<a href="https://colab.research.google.com/github/likeshd/time_series_models_libraries/blob/main/co2_emission_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import numpy as np

In [None]:
data = pd.read_excel(r"/content/CO2_Emission_Sample_Data.xlsx")
data.head(5)

Unnamed: 0,year,month,vehicle,co2
0,2015,1,Car,3.28
1,2015,1,Truck,1.06
2,2015,1,Bus,1.13
3,2015,1,Motorcycle,8.02
4,2015,2,Car,4.86


In [None]:
# Split data into features (year, month) and target variable (CO2)
X = data[['year', 'month']]
y = data['co2']
print(X,y)

     year  month
0    2015      1
1    2015      1
2    2015      1
3    2015      1
4    2015      2
..    ...    ...
475  2024     11
476  2024     12
477  2024     12
478  2024     12
479  2024     12

[480 rows x 2 columns] 0      3.28
1      1.06
2      1.13
3      8.02
4      4.86
       ... 
475    3.51
476    9.62
477    7.61
478    3.76
479    7.01
Name: co2, Length: 480, dtype: float64


In [None]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [None]:
from sklearn.linear_model import LinearRegression, LassoCV, RidgeCV
# Train a simple linear regression model
linear_reg_model = LinearRegression()
linear_reg_model.fit(X_train, y_train)

# Evaluate the linear regression model
linear_reg_pred = linear_reg_model.predict(X_test)
linear_reg_mae = mean_absolute_error(y_test, linear_reg_pred)
print("Linear Regression Mean Absolute Error:", linear_reg_mae)


Linear Regression Mean Absolute Error: 2.041955486756347


In [None]:
# Train a simple linear regression model
lasso_reg_model = LassoCV()
lasso_reg_model.fit(X_train, y_train)

# Evaluate the linear regression model
lasso_reg_pred = lasso_reg_model.predict(X_test)
lasso_reg_mae = mean_absolute_error(y_test, lasso_reg_pred)
print("Lasso Regression Mean Absolute Error:", lasso_reg_mae)

Lasso Regression Mean Absolute Error: 2.0219184027777777


In [None]:
# Train a simple linear regression model
ridge_reg_model = RidgeCV()
ridge_reg_model.fit(X_train, y_train)

# Evaluate the linear regression model
ridge_reg_pred = ridge_reg_model.predict(X_test)
ridge_reg_mae = mean_absolute_error(y_test, ridge_reg_pred)
print("Ridge Regression Mean Absolute Error:", ridge_reg_mae)


Ridge Regression Mean Absolute Error: 2.0419054442249194


In [None]:
from sklearn.tree import DecisionTreeRegressor
# Train a simple linear regression model
decision_tree_pred = DecisionTreeRegressor()
decision_tree_pred.fit(X_train, y_train)

# Evaluate the linear regression model
dc_reg_pred = decision_tree_pred.predict(X_test)
dc_reg_mae = mean_absolute_error(y_test, dc_reg_pred)
print("decision tree Regression Mean Absolute Error:", dc_reg_mae)


decision tree Regression Mean Absolute Error: 2.2295833333333337


In [None]:
from sklearn.ensemble import RandomForestRegressor
# Train a simple linear regression model
random_forest_pred = RandomForestRegressor()
random_forest_pred.fit(X_train, y_train)

# Evaluate the linear regression model
rf_reg_pred = random_forest_pred.predict(X_test)
rf_reg_mae = mean_absolute_error(y_test, rf_reg_pred)
print("random forest Regression Mean Absolute Error:", rf_reg_mae)

random forest Regression Mean Absolute Error: 1.6648338001668472


In [None]:
from xgboost import XGBRegressor
# Train a simple linear regression model
xgb_pred = XGBRegressor()
xgb_pred.fit(X_train, y_train)

# Evaluate the linear regression model
xgb_pred = xgb_pred.predict(X_test)
xgb_mae = mean_absolute_error(y_test, xgb_pred)
print(" xgboost Mean Absolute Error:", xgb_mae)

 xgboost Mean Absolute Error: 1.6724975325663884


### conclusion
Linear Regression Mean Absolute Error: 2.041955486756347

Lasso Regression Mean Absolute Error: 2.0219184027777777

Ridge Regression Mean Absolute Error: 2.041905444224919

decision tree Regression Mean Absolute Error: 2.229583

random forest Regression Mean Absolute Error: 2.1859308

xgboost Mean Absolute Error: 2.22606883486112

from above comparison we can observe that lasso regression or ridge regression model can be implemented.





In [None]:
def predict_upcoming_CO2(model, years, months, vehicle):
    upcoming_data = pd.DataFrame({'year': years, 'month': months, 'vehicle': vehicle})
    upcoming_data = upcoming_data[upcoming_data['vehicle'].isin(['Car', 'Truck', 'Bus', 'Motorcycle'])]
    X_pred = upcoming_data[['year', 'month']]
    predictions = model.predict(X_pred)
    upcoming_data['CO2_prediction'] = predictions
    return upcoming_data

In [None]:
upcoming_years = [2025, 2026, 2027]
all_months = np.arange(1, 13)
all_vehicles = ['Car', 'Truck', 'Bus', 'Motorcycle']

upcoming_predictions = predict_upcoming_CO2(ridge_reg_model, upcoming_years * len(all_months) * len(all_vehicles),
                                             np.repeat(all_months, len(upcoming_years) * len(all_vehicles)),
                                             np.tile(all_vehicles, len(upcoming_years) * len(all_months)))
upcoming_predictions

Unnamed: 0,year,month,vehicle,CO2_prediction
0,2025,1,Car,5.747594
1,2026,1,Truck,5.760426
2,2027,1,Bus,5.773258
3,2025,1,Motorcycle,5.747594
4,2026,1,Car,5.760426
...,...,...,...,...
139,2026,12,Motorcycle,5.119728
140,2027,12,Car,5.132560
141,2025,12,Truck,5.106896
142,2026,12,Bus,5.119728
