In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load the dataset into a Pandas DataFrame
# Replace 'path_to_csv' with the actual path to your downloaded CSV file
data = pd.read_csv('path_to_csv')

# Select relevant features for prediction
features = ['capacity_mw', 'other_fuel1', 'other_fuel2', 'other_fuel3']

# Remove rows with missing values in 'capacity_mw' or 'primary_fuel'
data = data.dropna(subset=['capacity_mw', 'primary_fuel'])

# Encode categorical fuel types using one-hot encoding
data = pd.get_dummies(data, columns=['other_fuel1', 'other_fuel2', 'other_fuel3'])

# Split data into features (X) and target (y)
X = data.drop(['primary_fuel'], axis=1)
y = data['primary_fuel']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train a RandomForestClassifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Example prediction for capacity_mw
example_capacity = [[100, 0, 1, 0, 0, 0, 0, 1, 0]]  # Replace with your own data
predicted_fuel = clf.predict(example_capacity)
print(f'Predicted Primary Fuel: {predicted_fuel[0]}')
