In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

# Load the dataset
data = pd.read_csv('/content/dataset.csv')

# Data Preprocessing
# Convert 'Rain Today' and 'rain' into binary values
data['Rain Today'] = data['Rain Today'].map({'Yes': 1, 'No': 0})
data['rain'] = data['rain'].map({'True': 1, 'False': 0})

# Selecting features for prediction
features = ['date', 'location', 'valid_time_gmt', 'month', 'time', '(Day/Night)',
            'temperature', 'condition', 'dewPoint', 'heat_index', 'humidity %',
            'pressure', 'visibility', 'windchill', 'wdirection', 'wdir_cardinal',
            'gust', 'wspd', 'uv_desc', 'feels_like', 'uv_index', 'clds']

# Target variable
target = 'Rain Today'

# Drop rows with missing target values
data = data.dropna(subset=[target])

# Split data into features and target
X = data[features]
y = data[target]

# Preprocessing pipeline for numerical and categorical features
numeric_features = ['date', 'valid_time_gmt', 'temperature', 'dewPoint', 'heat_index', 'humidity %',
                    'pressure', 'visibility', 'windchill', 'wdirection', 'gust', 'wspd',
                    'feels_like', 'uv_index']
categorical_features = ['location', 'month', 'time', '(Day/Night)', 'condition', 'wdir_cardinal',
                        'uv_desc', 'clds']

# Define the preprocessing steps
numeric_transformer = SimpleImputer(strategy='mean')
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine preprocessing for numerical and categorical data
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Create a RandomForest classifier pipeline
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
# accuracy = accuracy_score(y_test, y_pred)
# print(f"Model Accuracy: {accuracy:.2f}")

# Function to make a prediction for a given date and location
def predict_rain(date, location):
    # Create a data frame for input
    input_data = pd.DataFrame({
        'date': [date],
        'location': [location],
        'valid_time_gmt': [0],  # Assuming default for new input, this would be adjusted for real-time inputs
        'month': ['January'],  # Extract month from date, this needs a date parsing step
        'time': ['12:00 PM'],  # Default time, adjust as needed
        '(Day/Night)': ['Day'],  # Default value, adjust as needed
        'temperature': [70.0],  # Example temperature, adjust for real input
        'condition': ['Fair'],  # Default condition
        'dewPoint': [50.0],  # Example dew point
        'heat_index': [72.0],  # Example heat index
        'humidity %': [50.0],  # Example humidity
        'pressure': [1013.0],  # Example pressure
        'visibility': [10.0],  # Example visibility
        'windchill': [0],  # Example windchill
        'wdirection': [0],  # Example wind direction
        'wdir_cardinal': ['N'],  # Example wind direction cardinal
        'gust': [0],  # Example gust speed
        'wspd': [5],  # Example wind speed
        'uv_desc': ['Low'],  # Example UV description
        'feels_like': [70.0],  # Example feels like temperature
        'uv_index': [0],  # Example UV index
        'clds': ['CLR']  # Example cloudiness
    })

    # Make prediction
    rain_prediction = model.predict(input_data)

    return "Rain" if rain_prediction[0] == 1 else "No Rain"

# Example prediction
predict_rain('20241001', 'Nagpur/Sonegaon')


  data = pd.read_csv('/content/dataset.csv')


Model Accuracy: 1.00


'No Rain'

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import random

# Load dataset
data = pd.read_csv('/content/Nagpur_dataset - foo2014-2022.csv')

# Handle missing values
data.fillna(0, inplace=True)

# Extract month and use it for custom logic
rainy_season_months = ['June', 'July', 'August', 'September']

# Preprocessing: Convert categorical columns to numerical using pd.get_dummies
categorical_columns = ['location', 'month', 'time', '(Day/Night)', 'condition', 'wdir_cardinal', 'uv_desc', 'clds']
data = pd.get_dummies(data, columns=categorical_columns, drop_first=True)

# Define features and target
X = data.drop(columns=['Rain Today', 'date', 'valid_time_gmt', 'rain'])  # Drop irrelevant or target columns
y = data['Rain Today'].apply(lambda x: 1 if x == 'Yes' else 0)  # Convert 'Yes'/'No' to 1/0

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict function with custom logic for June to September
def custom_predict(date, location, model, data):
    # Extract month from the input date (Assuming date format YYYYMMDD as per your data)
    month = pd.to_datetime(str(int(date)), format='%Y%m%d').strftime('%B')
    # Create a random Rain/No Rain prediction for June-September
    if month in rainy_season_months:
        return random.choice(['Rain', 'No Rain'])
    else:
        return 'No Rain'

# Evaluate model on test set (without custom logic)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
# print(f"Model accuracy custom logic: {accuracy:.1f}")

# Example usage for prediction:
date_input = 20240926  # Example date
location_input = "Nagpur/Sonegaon"  # Example location
rainfall_prediction = custom_predict(date_input, location_input, model, data)
print(f"Rainfall prediction for {date_input} at {location_input}: {rainfall_prediction}")


  data = pd.read_csv('/content/Nagpur_dataset - foo2014-2022.csv')


Rainfall prediction for 20240926 at Nagpur/Sonegaon: Rain


In [None]:
import pickle
filename = 'model.pkl'
pickle.dump(model, open(filename, 'wb'))

# some time later...

# load the model from disk
loaded_model = pickle.load(open(filename, 'rb'))
