__Imports__

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

__Reading data__

In [14]:
df = pd.read_csv('CO2_emission.csv')

for c in df.columns:
    if df[c].dtype=='object': 
        df[c] = df[c].fillna('N')
        lbl = LabelEncoder()
        lbl.fit(list(df[c].values))
        df[c] = lbl.transform(df[c].values)

target = 'CO2_Emissions'

X = df.drop(target, axis=1)
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = MLPRegressor(hidden_layer_sizes=(10, ), activation='relu',learning_rate='constant', max_iter=1000, learning_rate_init=0.01)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print('Mean squared error:', mse)

scores = cross_val_score(model, X, y, cv=10)
print(f'Scores: {scores}')
print(f'Average score: {np.mean(scores)*100:.2f}%')

Mean squared error: 248.13231435538836
Scores: [0.92731259 0.91944105 0.84393591 0.56058309 0.93337275 0.9728968
 0.94290817 0.98089816 0.94984901 0.92918541]
Average score: 89.60%


In [None]:
plt.plot(df['Make'], df['CO2_Emissions'], 'r-*')
plt.xlabel('Make')
plt.ylabel('CO2_Emissions')
plt.title('Make vs CO2_Emissions')
plt.show()

In [None]:
plt.plot(y[:200], label='True values')
plt.plot(y_pred, label='Forecasted values')
plt.title('CO2 Emissions Forecasting')
plt.legend()
plt.show()

In [15]:
df_grouped = df.groupby('Model').mean()
X = df_grouped.drop('CO2_Emissions', axis=1)
y = df_grouped['CO2_Emissions']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = MLPRegressor(hidden_layer_sizes=(10, ), activation='relu',learning_rate='constant', max_iter=1000, learning_rate_init=0.01)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print('Mean squared error:', mse)
scores = cross_val_score(model, X, y, cv=10)
print(f'Scores: {scores}')
print(f'Average score: {np.mean(scores)*100:.2f}%')

Mean squared error: 149.05945886629647
Scores: [0.94308467 0.96742809 0.94815767 0.99089008 0.93609208 0.98600789
 0.96208139 0.98143176 0.8823432  0.97472574]
Average score: 95.72%


In [16]:
model = MLPRegressor(hidden_layer_sizes=(10, ), activation='relu',learning_rate='constant', max_iter=1000, learning_rate_init=0.1)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print('Mean squared error:', mse)
scores = cross_val_score(model, X, y, cv=10)
print(f'Scores: {scores}')
print(f'Average score: {np.mean(scores)*100:.2f}%')

Mean squared error: 128.6285803062499
Scores: [0.93883498 0.98268384 0.95441123 0.97664861 0.86211584 0.91066848
 0.96544856 0.97073931 0.91070536 0.92652276]
Average score: 93.99%


In [17]:
model = MLPRegressor(hidden_layer_sizes=(10, ), activation='identity',learning_rate='constant', max_iter=1000, learning_rate_init=0.1)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print('Mean squared error:', mse)
scores = cross_val_score(model, X, y, cv=10)
print(f'Scores: {scores}')
print(f'Average score: {np.mean(scores)*100:.2f}%')

Mean squared error: 120.92634777909745
Scores: [0.97363815 0.9804532  0.96640455 0.98284877 0.93277221 0.93012456
 0.95356274 0.98702085 0.89348668 0.98133334]
Average score: 95.82%


In [18]:
model = MLPRegressor(hidden_layer_sizes=(10, 10), activation='relu',learning_rate='constant', max_iter=1000, learning_rate_init=0.1)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print('Mean squared error:', mse)

scores = cross_val_score(model, X, y, cv=10)
print(f'Scores: {scores}')
print(f'Average score: {np.mean(scores)*100:.2f}%')

Mean squared error: 581.7667327695366
Scores: [0.89917685 0.97803358 0.97379939 0.94503199 0.79527395 0.97129083
 0.95967073 0.86467471 0.90258173 0.85496838]
Average score: 91.45%


In [31]:
list = [5, 7, 15, 20]

def k_fold(model, X, y,list):
    scores = []
    for i in list:
        model = MLPRegressor(hidden_layer_sizes=(10, ), activation='relu',learning_rate='constant', max_iter=1000, learning_rate_init=0.1)
        score = cross_val_score(model, X, y, cv=i)
        scores.append(round(score.mean(), 4))
    return scores

results = k_fold(model, X, y, list)

print('K-Fold Results')
print('K\tScore')
for i in range(len(list)):
    print(f'{list[i]}\t{results[i]*100:.2f}%')

K-Fold Results
K	Score
5	96.66%
7	88.71%
15	94.77%
20	94.85%
