In [3]:
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:
data = pd.read_csv('data/Weather_Data.csv')
data.head()

Unnamed: 0,Date,Temp9am,Temp3pm,MinTemp,MaxTemp,Rainfall,RainToday,Evaporation,Sunshine,WindGustDir,...,WindDir9am,WindDir3pm,WindSpeed9am,WindSpeed3pm,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm
0,01-02-13,20.7,20.9,19.5,22.4,15.6,Yes,6.2,0.0,W,...,S,SSW,17,20,92,84,1017.6,1017.4,8,8
1,02-02-13,22.4,24.8,19.5,25.6,6.0,Yes,3.4,2.7,W,...,W,E,9,13,83,73,1017.9,1016.4,7,7
2,03-02-13,23.5,23.0,21.6,24.5,6.6,Yes,2.4,0.1,W,...,ESE,ESE,17,2,88,86,1016.7,1015.6,7,8
3,04-02-13,21.4,20.9,20.2,22.8,18.8,Yes,2.2,0.0,W,...,NNE,E,22,20,83,90,1014.2,1011.8,8,8
4,05-02-13,22.5,25.5,19.7,25.7,77.4,Yes,4.8,0.0,W,...,NNE,W,11,6,88,74,1008.3,1004.8,8,8


In [5]:
selected_columns = [
    'Date',
    'MaxTemp',
    'MinTemp',
    'Sunshine',
    'Rainfall'
]
# Filter the data to only include the selected columns
data = data[selected_columns]
data.head()

Unnamed: 0,Date,MaxTemp,MinTemp,Sunshine,Rainfall
0,01-02-13,22.4,19.5,0.0,15.6
1,02-02-13,25.6,19.5,2.7,6.0
2,03-02-13,24.5,21.6,0.1,6.6
3,04-02-13,22.8,20.2,0.0,18.8
4,05-02-13,25.7,19.7,0.0,77.4


In [6]:
# Convert the 'Date' column to just month format
data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%y')
data['Month'] = data['Date'].dt.month
# Drop the original 'Date' column
data.drop(columns=['Date'], inplace=True)

data.head()

Unnamed: 0,MaxTemp,MinTemp,Sunshine,Rainfall,Month
0,22.4,19.5,0.0,15.6,2
1,25.6,19.5,2.7,6.0,2
2,24.5,21.6,0.1,6.6,2
3,22.8,20.2,0.0,18.8,2
4,25.7,19.7,0.0,77.4,2


In [7]:
# Define a function to map months to seasons
def assign_season(month):
    if month in [3, 4, 5, 6]:  # March to mid-June (assuming June as Pre-Kharif)
        return 'pre-kharif'
    elif month in [7, 8, 9, 10]:  # mid-June to October (July to October for Kharif)
        return 'kharif'
    elif month in [11, 12, 1, 2]:  # November to February
        return 'rabi'
    else:
        return 'Unknown'  # Handle any unexpected month values

# Apply the function to create a 'Season' column
data['Season'] = data['Month'].apply(assign_season)

# Drop the original 'Date' column (and optionally 'Month' if not needed)
data.drop(columns=['Month'], inplace=True)  # Remove 'Date' column
# data.drop(columns=['Month'], inplace=True)  # Uncomment to remove 'Month' too

# Display the first few rows
data.head()

Unnamed: 0,MaxTemp,MinTemp,Sunshine,Rainfall,Season
0,22.4,19.5,0.0,15.6,rabi
1,25.6,19.5,2.7,6.0,rabi
2,24.5,21.6,0.1,6.6,rabi
3,22.8,20.2,0.0,18.8,rabi
4,25.7,19.7,0.0,77.4,rabi


In [8]:
# Define daily suitability conditions
day_temp_condition = (data['MaxTemp'] >= 20) & (data['MaxTemp'] <= 36)
night_temp_condition = (data['MinTemp'] >= 20) & (data['MinTemp'] <= 23)
sunshine_condition = data['Sunshine'] >= 6
rainfall_condition = (data['Rainfall'] >= 2) & (data['Rainfall'] <= 30)

# Determine suitability
suitable = day_temp_condition & night_temp_condition & sunshine_condition & rainfall_condition
data['Suitability'] = np.where(suitable, 'suitable', 'unsuitable')

data.head()

Unnamed: 0,MaxTemp,MinTemp,Sunshine,Rainfall,Season,Suitability
0,22.4,19.5,0.0,15.6,rabi,unsuitable
1,25.6,19.5,2.7,6.0,rabi,unsuitable
2,24.5,21.6,0.1,6.6,rabi,unsuitable
3,22.8,20.2,0.0,18.8,rabi,unsuitable
4,25.7,19.7,0.0,77.4,rabi,unsuitable


In [9]:
data['Suitability'].value_counts()

Suitability
unsuitable    3246
suitable        25
Name: count, dtype: int64

In [10]:
new_data = {
    'season': data['Season'],
    'average_day_temp': data['MaxTemp'],
    'average_night_temp': data['MinTemp'],
    'daily_sunshine_hours': data['Sunshine'],
    'daily_rainfall': data['Rainfall'],
    'suitability': data['Suitability']
}

In [11]:
new_data = pd.DataFrame(new_data)
new_data.head()

Unnamed: 0,season,average_day_temp,average_night_temp,daily_sunshine_hours,daily_rainfall,suitability
0,rabi,22.4,19.5,0.0,15.6,unsuitable
1,rabi,25.6,19.5,2.7,6.0,unsuitable
2,rabi,24.5,21.6,0.1,6.6,unsuitable
3,rabi,22.8,20.2,0.0,18.8,unsuitable
4,rabi,25.7,19.7,0.0,77.4,unsuitable


In [12]:
new_data.to_csv('data/Weather_Suitability_test.csv', index=False)