In [None]:
### BUILDING A MACHINE LEARNING MODEL WHICH WILL PREDICT WHETHER OR NOT IT WILL RAIN TOMORROW BY LEARNING FROM PAST DATA ###

### SETUP/LOAD LIBRARIES ###

import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

### STEP 1 ### LOAD THE DATASET ###

df = pd.read_csv('weather.csv')
print('size of data frame =', df.shape)
print(df[0:5])

### STEP 2 ### REMOVE NULL VALUES FROM THE DATA FRAME###

df = df.dropna(how='any')
print('size after remoing null values =', df.shape)

### STEP 3 ### HANDLE NON-NUMERIC DATA AND NORMALIZE ###

y = df[['RainTomorrow']]
df = df.select_dtypes(include=np.number)

#Define X here by dropping 'RISK_MM' from the dataframe

X =df.drop(columns=['RISK_MM'])
scaler = preprocessing.MinMaxScaler()
scaler.fit(X)
X_scaled = pd.DataFrame(scaler.transform(X), index=X.index, columns=X.columns)
print(X_scaled.iloc[4:10])

### STEP 4 ### FEATURE SELECTION ###

selector = SelectKBest(chi2, k=5)
selector.fit(X_scaled, y)
X_new = selector.transform(X_scaled)
print(X_scaled.columns[selector.get_support(indices=True)])

### STEP 5 ### PUT IMPORTANT FEATURES IN THE DATA FRAME ###

df = df[['Sunshine', 'Humidity3pm', 'Pressure3pm', 'Cloud9am', 'Cloud3pm']]
df['RainTomorrow'] = y
X = df.drop(columns=['RainTomorrow'])

### STEP 6 ### DATA SPLICING ###

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
clf_logreg = LogisticRegression(random_state=0)


### STEP 7 ### BUILDING THE MODEL USING THE TRAINING DATA SET ###

clf_logreg.fit(X_train, y_train.values.ravel())


### STEP 8 ### EVALUATE THE MODEL USING THE TRAINING DATA SET ###
y_pred = clf_logreg.predict(X_test)


### STEP 9 ### CALCULATE ACCURACY ###

score = accuracy_score(y_test, y_pred)

print('Accuracy using Logistic Regression:',score)

### STEP 10 ###TEST YOUR OWN WEATHER DATA ###

print("\nTry your own weather info to see if it might rain tomorrow!")
sunshine = float(input("Enter Sunshine (hours): "))
humidity3pm = float(input("Enter Humidity at 3pm (%): "))
pressure3pm = float(input("Enter Pressure at 3pm (hPa): "))
cloud9am = float(input("Enter Cloud at 9am (oktas 0–8): "))
cloud3pm = float(input("Enter Cloud at 3pm (oktas 0–8): "))

# Make a single-row DataFrame with the new input
student_input = pd.DataFrame([[sunshine, humidity3pm, pressure3pm, cloud9am, cloud3pm]], columns=['Sunshine', 'Humidity3pm', 'Pressure3pm', 'Cloud9am', 'Cloud3pm'])

# Use the same scaler from before to transform the new data
student_input_scaled = scaler.transform(student_input)

# Ask the model to make a prediction
prediction = clf_logreg.predict(student_input_scaled)

# Show the prediction result
print("\n🤖 Prediction: Will it rain tomorrow?")
print("Yes! 🌧️" if prediction[0] == 1 else "No! ☀️")

size of data frame = (366, 22)
   MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine WindGustDir  \
0      8.0     24.3       0.0          3.4       6.3          NW   
1     14.0     26.9       3.6          4.4       9.7         ENE   
2     13.7     23.4       3.6          5.8       3.3          NW   
3     13.3     15.5      39.8          7.2       9.1          NW   
4      7.6     16.1       2.8          5.6      10.6         SSE   

   WindGustSpeed WindDir9am WindDir3pm  WindSpeed9am  ...  Humidity3pm  \
0           30.0         SW         NW           6.0  ...           29   
1           39.0          E          W           4.0  ...           36   
2           85.0          N        NNE           6.0  ...           69   
3           54.0        WNW          W          30.0  ...           56   
4           50.0        SSE        ESE          20.0  ...           49   

   Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  Temp9am  Temp3pm  RainToday  \
0       1019.7       1015.0        