<a href="https://colab.research.google.com/github/kaihuan-huang/AAI-York/blob/main/Applied_Artificial_Intelligence_SUMMATIVE_ASSESSMENT_BRIEF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Step 1: Data Preprocessing
# Load the CO2 emissions dataset
co2_data = pd.read_csv('/Users/huanhuan/Desktop/co2_emissions.csv')

# Load the renewable energy consumption dataset
renewable_data = pd.read_csv('renewable_energy_consumption.csv')

# Merge the datasets based on a common identifier (e.g., country and year)
merged_data = pd.merge(co2_data, renewable_data, on=['country', 'year'])

# Split the data into features and target variable
features = merged_data.drop(['country', 'year', 'CO2_emissions'], axis=1)
target = merged_data['CO2_emissions']

# Step 2: Model 1 - Full Dataset
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Train a random forest regressor using the full dataset
model_1 = RandomForestRegressor(random_state=42)
model_1.fit(X_train, y_train)

# Evaluate the model on the testing set
predictions_1 = model_1.predict(X_test)
mse_1 = mean_squared_error(y_test, predictions_1)
print('Model 1 MSE:', mse_1)

# Step 3: Feature Selection
# Apply feature selection techniques to identify relevant features
# For example, you can use recursive feature elimination with random forests
from sklearn.feature_selection import RFE

rfe = RFE(estimator=RandomForestRegressor(random_state=42), n_features_to_select=5)
rfe.fit(features, target)

# Get the selected features
selected_features = features.columns[rfe.support_]

# Step 4: Model 2 - Subset of Relevant Features
# Split the data into training and testing sets
X_train_sel = X_train[selected_features]
X_test_sel = X_test[selected_features]

# Train a random forest regressor using the subset of relevant features
model_2 = RandomForestRegressor(random_state=42)
model_2.fit(X_train_sel, y_train)

# Evaluate the model on the testing set
predictions_2 = model_2.predict(X_test_sel)
mse_2 = mean_squared_error(y_test, predictions_2)
print('Model 2 MSE:', mse_2)

# Step 5: Evaluation and Analysis
# Compare the performance of Model 1 and Model 2
print('MSE Improvement:', mse_1 - mse_2)


FileNotFoundError: ignored