In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor
train=pd.read_csv('/kaggle/input/ai-201-b-mse-1-ai-d/train.csv')
test=pd.read_csv('/kaggle/input/ai-201-b-mse-1-ai-d/test.csv')
train.head()

#Filling Missing Values
for x in train.columns:
    train[x]=train[x].fillna(train[x].mode()[0])
for x in test.columns:
    test[x]=test[x].fillna(test[x].mode()[0])    

#Drop ID Column(if present)
id_column=None
if 'id' in train.columns:
    train=train.drop('id',axis=1)
if 'id' in test.columns:
    id_column=test['id']
    test=test.drop('id',axis=1)

#Features and Target Column
x=train.drop('Energy_Output_Difference',axis=1)
y=train['Energy_Output_Difference']

#Spliting into Training and Validiation
x_train,x_val,y_train,y_val=train_test_split(x,y,test_size=0.2,random_state=0)

#Models
models={
    'Random Forest': RandomForestRegressor(n_estimators=200,random_state=0),
    'Decision Tree': DecisionTreeRegressor(random_state=0),
    'Linear Regression': LinearRegression(),
    'XGBoost': XGBRegressor(
        n_estimators=500,
        learning_rate=0.05,
        max_depth=5,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=0,
        verbosity=0
    )
}

#Fit , Predict and Evaluate the best model
r2_scores={}
for name,model in models.items():
    model.fit(x_train,y_train)
    y_pred=model.predict(x_val)
    r2=r2_score(y_val,y_pred)
    r2_scores[name]=r2
    print(f'{name} R2 Score: {r2:.4f}')

best_model_name=max(r2_scores,key=r2_scores.get)
best_model=models[best_model_name]
print("Best Model Name: ",best_model_name)

#On Testing Data
x_test=test.copy()
y_pred_test=best_model.predict(x_test)

#Generating submission file
submission=pd.DataFrame({
    'id':id_column,
    'Energy_Output_Difference':y_pred_test
})
submission.to_csv("submission1.csv",index=False)
print(f"The best model is {best_model_name} with R2 Score = {r2_scores[best_model_name]:.4f}")
print("submission1.csv generated using best model!")
submission.head()

