In [None]:
import pandas as pd

from google.colab import drive
drive.mount('/content/gdrive')

# Load the dataset
rainfall_data = '/content/mumbai-monthly-rains.csv'

# Display the first few rows of the dataset to understand its structure
rainfall_data.head()


Mounted at /content/gdrive


AttributeError: 'str' object has no attribute 'head'

In [None]:
# Display summary statistics and check columns
print(rainfall_data.describe())
print(rainfall_data.columns)


In [None]:
# Check for missing values
print(rainfall_data.isnull().sum())

# Fill missing values or handle them as necessary
rainfall_data = rainfall_data.fillna(rainfall_data.mean())


In [None]:
# Ensure that 'Year' is treated as an integer for modeling
rainfall_data['Year'] = rainfall_data['Year'].astype(int)

# Extract features and target
X = rainfall_data[['Year']]
months = rainfall_data.columns[1:-1]  # Assuming 'Year' is the first column and 'Total' is the last column



In [None]:
from sklearn.linear_model import LinearRegression
import numpy as np

# Dictionary to store models for each month
models = {}

for month in months:
    y = rainfall_data[month]
    model = LinearRegression()
    model.fit(X, y)
    models[month] = model


In [None]:
from sklearn.metrics import mean_squared_error, r2_score

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')


In [None]:
# Predict for the year 2022
year_2022 = np.array([[2022]])

predictions_2022 = {}
for month, model in models.items():
    predicted_rainfall = model.predict(year_2022)
    predictions_2022[month] = predicted_rainfall[0]

print(f"Predicted Monthly Rainfall for 2022: {predictions_2022}")


In [None]:
import matplotlib.pyplot as plt

# Convert predictions to a pandas DataFrame for easier plotting
predictions_df = pd.DataFrame(list(predictions_2022.items()), columns=['Month', 'Predicted Rainfall'])

# Sort the DataFrame by month for correct plotting
predictions_df['Month'] = pd.Categorical(predictions_df['Month'], categories=months, ordered=True)
predictions_df.sort_values('Month', inplace=True)

# Plot the predicted rainfall
plt.figure(figsize=(10, 6))
plt.plot(predictions_df['Month'], predictions_df['Predicted Rainfall'], marker='o', linestyle='-', color='b')
plt.title('Predicted Monthly Rainfall for 2022')
plt.xlabel('Month')
plt.ylabel('Rainfall (mm)')
plt.grid(True)
plt.xticks(rotation=45)
plt.show()
