In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import xgboost as xgb
import numpy as np
from sklearn.impute import SimpleImputer

# Load the dataset
file_path = "C:\\Users\\Hiremath\\OneDrive\\Desktop\\temperature.csv"
data = pd.read_csv(file_path)

# Convert 'Date' column to datetime with explicit format
data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%Y')
data['DayOfYear'] = data['Date'].dt.dayofyear
data['Year'] = data['Date'].dt.year
data.drop(['Date'], axis=1, inplace=True)  # Remove the original 'Date' column

# Drop 'station' if not relevant for prediction
data.drop(['station'], axis=1, inplace=True)

# Handle missing and infinite values in target variables
y_max = data['Next_Tmax']
y_min = data['Next_Tmin']
y_max.replace([np.inf, -np.inf], np.nan, inplace=True)
y_min.replace([np.inf, -np.inf], np.nan, inplace=True)
imputer = SimpleImputer(strategy='mean')
y_max = imputer.fit_transform(y_max.values.reshape(-1, 1))
y_min = imputer.fit_transform(y_min.values.reshape(-1, 1))

# Split data into features (X) and target variables (y)
X = data.drop(['Next_Tmax', 'Next_Tmin'], axis=1)

# Split data into training and testing sets
X_train, X_test, y_max_train, y_max_test, y_min_train, y_min_test = train_test_split(X, y_max, y_min, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build and train the Max Temperature model (XGBoost example)
max_temp_model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100)
max_temp_model.fit(X_train_scaled, y_max_train)

# Build and train the Min Temperature model (XGBoost example)
min_temp_model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100)
min_temp_model.fit(X_train_scaled, y_min_train)

# Make predictions
max_temp_predictions = max_temp_model.predict(X_test_scaled)
min_temp_predictions = min_temp_model.predict(X_test_scaled)

# Evaluate the models
max_temp_rmse = mean_squared_error(y_max_test, max_temp_predictions, squared=False)
min_temp_rmse = mean_squared_error(y_min_test, min_temp_predictions, squared=False)

print(f"Max Temperature RMSE: {max_temp_rmse}")
print(f"Min Temperature RMSE: {min_temp_rmse}")


Max Temperature RMSE: 0.8097671189400242
Min Temperature RMSE: 0.6323476349458291
