In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from datetime import datetime


In [None]:
df = pd.read_csv("weather_data_copy.csv")
print(df.head())
print(df.info())


In [None]:
df.columns = ['DATE', 'TEMP', 'HUMIDITY', 'RAINFALL']
df['DATE'] = pd.to_datetime(df['DATE'], errors='coerce')
df = df.dropna(subset=['DATE'])
df['YEAR'] = df['DATE'].dt.year
df['MONTH'] = df['DATE'].dt.month
df['TEMP'].fillna(df['TEMP'].mean(), inplace=True)
df['HUMIDITY'].fillna(df['HUMIDITY'].mean(), inplace=True)
df['RAINFALL'].fillna(df['RAINFALL'].mean(), inplace=True)


In [None]:
print(df.describe())
plt.figure(figsize=(8,6))
sns.heatmap(df[['TEMP','HUMIDITY','RAINFALL']].corr(), annot=True, cmap='coolwarm')
plt.title("Correlation Heatmap")
plt.show()


In [None]:
#Line Chart - Temperature Over Years
temp_trend = df.groupby('YEAR')['TEMP'].mean()
temp_trend.plot(marker='o', color='blue')
plt.title("Average Temperature Over Years")
plt.ylabel("Temperature")
plt.xlabel("Year")
plt.grid(True)
plt.show()


In [None]:
#Bar Graph - Total Rainfall by Year
rainfall_by_year = df.groupby('YEAR')['RAINFALL'].sum()
rainfall_by_year.plot(kind='bar', color='skyblue')
plt.title("Total Rainfall by Year")
plt.ylabel("Rainfall")
plt.xlabel("Year")
plt.show()


In [None]:
#Scatter Plot - Temperature vs Humidity
plt.figure(figsize=(8,6))
sns.scatterplot(data=df, x='HUMIDITY', y='TEMP')
plt.title("Temperature vs Humidity")
plt.xlabel("Humidity")
plt.ylabel("Temperature")
plt.show()

In [None]:
# Group average temperature per year
temp_df = df.groupby('YEAR')['TEMP'].mean().reset_index()
X = temp_df[['YEAR']]
y = temp_df['TEMP']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
#Train the Model
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print(f"Model MSE: {mse:.2f}")
print(f"Model RMSE: {rmse:.2f}")


In [None]:
# Future years prediction
future_years = pd.DataFrame({'YEAR': list(range(temp_df['YEAR'].min(), temp_df['YEAR'].max()+10))})
future_preds = model.predict(future_years)

plt.plot(temp_df['YEAR'], temp_df['TEMP'], label='Actual', marker='o')
plt.plot(future_years['YEAR'], future_preds, label='Predicted', color='orange', linestyle='--')
plt.title("Temperature Trend Forecast")
plt.xlabel("Year")
plt.ylabel("Temperature")
plt.legend()
plt.grid(True)
plt.show()
