# DATA CLEANING PROCESS

In [1]:
"""Importing the neccessary python Libraries"""
import numpy as np
import pandas as pd
from sklearn import tree
pd.set_option('display.max_columns', None)

"""Creating the rainfall data path"""
csv_path = 'C:\\Users\\DELL\\OneDrive\\Documents\\TASK 2\\Rainfall data 2002-2024.csv'

"""Reading the rainfall data in CSV format (Visual crossing)"""
csv_df = pd.read_csv(csv_path)

"""Checking the number of Rows and Columns in the CSV file"""
csv_df.shape

"""Checking for the number of duplicates values in the CSV file across all columns"""
csv_df.duplicated().sum()

"""Deleting the observed duplicate values"""
csv_df = csv_df.drop_duplicates().reset_index(drop=True)

"""Converting the datetime column to datetime datatype from object datatype"""
csv_df['datetime'] = pd.to_datetime(csv_df['datetime'])

"""Fill all Nan row using the ffill method"""
csv_df = csv_df.fillna(method='ffill')

"""Creating dayoftheweek, month and season columns from the datetime column"""
csv_df['dayofweek'] = csv_df['datetime'].dt.day_name()  
csv_df['month'] = csv_df['datetime'].dt.month  
csv_df['season'] = (csv_df['datetime'].dt.month % 12 + 3) // 3

"""Deleting columns not needed for building the model"""
csv_df = csv_df.drop(['feelslikemax', 'precipprob', 'feelslikemin', 'snow', 'snowdepth', 'datetime', 'name', 
                      'severerisk', 'sunrise', 'sunset', 'description', 'icon', 'stations', 'feelslike', 'preciptype',
                      'conditions', 'windspeedmax', 'windspeedmin'], axis=1).reset_index(drop=True)

"""Encoding the dayofweek categorical data to numeric data"""
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
csv_df['dayofweek'] = le.fit_transform(csv_df['dayofweek'])

"""Creating ohe_df dataframe from csv_df"""
ohe_df = csv_df

# DAILY FLOOD PREDICTION MODEL

In [25]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE
from imblearn.over_sampling import BorderlineSMOTE

# Split the data into features and target
X = ohe_df.drop('flood', axis=1)
y = ohe_df['flood']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Perform SMOTE to handle class imbalance
sm = BorderlineSMOTE(k_neighbors=5, random_state=42)
X_train_res, y_train_res = sm.fit_resample(X_train, y_train)

scaler = StandardScaler()
X_train_res_scaled = scaler.fit_transform(X_train_res)
X_test_scaled = scaler.transform(X_test)

# Initialize Logistic Regression model with a different solver and increased max_iter
lr = LogisticRegression(solver='liblinear', max_iter=1000, random_state=42)
lr.fit(X_train_res_scaled, y_train_res)

# Make predictions
predictions = lr.predict(X_test_scaled)

# Print classification report
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

         0.0       1.00      0.75      0.86      1677
         1.0       0.02      0.60      0.04        15

    accuracy                           0.75      1692
   macro avg       0.51      0.68      0.45      1692
weighted avg       0.99      0.75      0.85      1692



# TESTING OF THE MODEL BASED OF VISUAL CROSSING 15 DAYS RANGE WEATHER API

In [26]:
import pandas as pd
import urllib.request
import json
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

# Example API request to retrieve data
try: 
    ResultBytes = urllib.request.urlopen("https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/lagos%20nigeria?unitGroup=us&include=days&key=8BBTUCBUPQQ7HKQSCENEWPND5&contentType=json")
  
    # Parse the results as JSON
    jsonData = json.load(ResultBytes)
    
    # Extract relevant data from JSON response
    data_points = jsonData['days']
    
    # Convert JSON data to DataFrame
    api_data = pd.DataFrame(data_points)
    
    # Ensure datetime column is parsed correctly
    api_data['datetime'] = pd.to_datetime(api_data['datetime'])
    
    # Extract day of the week, month, and season from datetime
    api_data['dayofweek'] = api_data['datetime'].dt.day_name()  # Day of the week (Monday, Tuesday, etc.)
    api_data['month'] = api_data['datetime'].dt.month  # Month of the year (1-12)
    api_data['season'] = (api_data['datetime'].dt.month % 12 + 3) // 3  # Determine season
    
    # Use LabelEncoder to transform 'dayofweek' to numeric
    le = LabelEncoder()
    api_data['dayofweek'] = le.fit_transform(api_data['dayofweek'])
    
    # Define features used in your model
    features1 = ['tempmax', 'tempmin', 'temp', 'dew', 'humidity', 'precip', 'precipcover', 'windgust', 'windspeed', 'winddir', 
                'pressure', 'cloudcover', 'visibility', 'solarradiation', 'solarenergy', 'uvindex', 'moonphase', 
                'dayofweek', 'month', 'season']
    
    # Filter and preprocess API data to match your model's features
    X_api = api_data[features1] # Select relevant features
    
    # Scale the API data using the same scaler instance as used on training data
    scaler = StandardScaler()
    X_api_scaled = scaler.fit_transform(X_api)  # Use 'scaler' from your training step
    
    # Use the trained logistic regression model to predict on API data
    predictions = lr.predict(X_api_scaled)
    
    # Print or display predictions
    print("Predictions on API data:")
    print(predictions)
    
except urllib.error.HTTPError as e:
    ErrorInfo= e.read().decode() 
    print('Error code: ', e.code, ErrorInfo)
except urllib.error.URLError as e:
    ErrorInfo= e.read().decode() 
    print('Error code: ', e.code, ErrorInfo)
except KeyError as e:
    print(f"KeyError: {e}")

Predictions on API data:
[0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 1.]
