In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
import pickle

# Load the dataset
data = pd.read_csv('Dataset11-Weather-Data.csv')

# Encode categorical data
data['Weather_encoded'] = data['Weather'].factorize()[0]

# Define features and target
X = data[['Dew Point Temp_C', 'Rel Hum_%', 'Wind Speed_km/h', 'Visibility_km', 'Press_kPa', 'Weather_encoded']]
y = data['Temp_C']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize models
models = {
    "Linear Regression": LinearRegression(),
    "Decision Tree": DecisionTreeRegressor(),
    "Random Forest": RandomForestRegressor(),
    "SVR": SVR()
}

# Train, evaluate, and save the best model
best_model = None
best_r2 = float('-inf')

for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f"{model_name} - MSE: {mse:.2f}, R2: {r2:.2f}")
    
    if r2 > best_r2:
        best_model = model
        best_r2 = r2

# Save the best model
with open('model.pkl', 'wb') as file:
    pickle.dump(best_model, file)

print(f"Best model ({type(best_model).__name__}) saved as model.pkl")


Linear Regression - MSE: 0.66, R2: 1.00
Decision Tree - MSE: 0.21, R2: 1.00
Random Forest - MSE: 0.06, R2: 1.00
SVR - MSE: 0.20, R2: 1.00
Best model (RandomForestRegressor) saved as model.pkl
