<a href="https://colab.research.google.com/github/jsdhami/Lightning-Analysis/blob/main/Lightning_ML_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Create Our Own Model for Predictions**

In [1]:
!pip install scikit-learn



In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [3]:
# Load the dataset
file_path = 'https://github.com/jsdhami/Lightning-Analysis/raw/refs/heads/main/AEM/Tribhuvan_University_-_TLN_flash_data_-_01012012_00_UTC_to_02062025_00_UTC_flash20240519.csv'
data = pd.read_csv(file_path)
# Display the first few rows of the dataset
data.head()

Unnamed: 0,type,timestamp,latitude,longitude,peakcurrent,icheight,numbersensors,icmultiplicity,cgmultiplicity,starttime,endtime,duration,ullatitude,ullongitude,lrlatitude,lrlongitude
0,1,2024-05-18T23:59:46.020196438,26.543713,88.013611,5907,3785,7,3,0,2024-05-18T23:59:46.020196438,2024-05-18T23:59:46.466683865,0.446487,26.536683,26.548075,87.993874,88.017937
1,0,2024-05-18T23:59:48.425529957,26.725709,88.043728,-7255,0,7,0,1,2024-05-18T23:59:48.425529957,2024-05-18T23:59:48.425529957,0.0,26.725709,26.725709,88.043728,88.043728
2,0,2024-05-18T23:59:49.555409431,26.65043,88.076885,-21009,0,11,1,1,2024-05-18T23:59:49.455501795,2024-05-18T23:59:49.555409431,0.099908,26.65043,26.685673,88.018929,88.076885
3,0,2024-05-18T23:59:51.631084204,26.62312,88.080718,-22688,0,12,1,9,2024-05-18T23:59:51.631084204,2024-05-18T23:59:52.401532888,0.770449,26.5446,26.629116,88.051429,88.081592
4,0,2024-05-18T23:59:51.809715986,26.654232,87.983968,-2672,0,6,1,1,2024-05-18T23:59:51.809715986,2024-05-18T23:59:52.094818592,0.285103,26.654232,26.676188,87.948144,87.983968


In [4]:
# Data Cleaning: Convert timestamp columns to datetime and drop them
data['timestamp'] = pd.to_datetime(data['timestamp'])
data['starttime'] = pd.to_datetime(data['starttime'])
data['endtime'] = pd.to_datetime(data['endtime'])
data = data.drop(columns=['timestamp', 'starttime', 'endtime'])

# Define the features and target variable
X = data.drop(columns=['type'])
y = data['type']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create a Random Forest classifier
# Random Forest is a robust model for classification tasks with good performance on various datasets.
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test_scaled)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# Save the trained model to a file
joblib.dump(model, 'lightning_rf_model.pkl')
print("Model training complete. The trained model is saved as 'lightning_rf_model.pkl'.")


# Function to predict on new data
def predict_lightning_type(new_data):
    # Load the trained model
    model = joblib.load('lightning_rf_model.pkl')
    # Standardize the new data
    new_data_scaled = scaler.transform(new_data)
    # Make predictions
    predictions = model.predict(new_data_scaled)
    return predictions

Accuracy: 1.0
Model training complete. The trained model is saved as 'lightning_rf_model.pkl'.


In [5]:
# Example usage: Predict on new data
new_data = pd.DataFrame({
    'latitude': [26.543713],
    'longitude': [88.013611],
    'peakcurrent': [-2500],
    'icheight': [45454],
    'numbersensors': [7],
    'icmultiplicity': [0],
    'cgmultiplicity': [0],
    'duration': [0.2],
    'ullatitude': [26.536683],
    'ullongitude': [26.548075],
    'lrlatitude': [87.993874],
    'lrlongitude': [88.017937]
})

predictions = predict_lightning_type(new_data)
# Display the predictions
print("Predictions for new data:", predictions)

#  1 - IC lightning; 0 - CG lightning; 40 - CG detected by WWLLN

Predictions for new data: [1]


In [6]:
# train above model with another file
file_path2 = 'https://github.com/jsdhami/Lightning-Analysis/raw/refs/heads/main/AEM/Tribhuvan_University_-_TLN_flash_data_-_01012012_00_UTC_to_02062025_00_UTC_flash20240619.csv'
data2 = pd.read_csv(file_path2)
# Data Cleaning: Convert timestamp columns to datetime and drop them
data2['timestamp'] = pd.to_datetime(data2['timestamp'])
data2['starttime'] = pd.to_datetime(data2['starttime'])
data2['endtime'] = pd.to_datetime(data2['endtime'])
data2 = data2.drop(columns=['timestamp', 'starttime', 'endtime'])
# Define the features and target variable
X2 = data2.drop(columns=['type'])

y2 = data2['type']
# Split the dataset into training and testing sets
X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y2, test_size=0.2, random_state=42)
# Standardize the features
scaler2 = StandardScaler()
X2_train_scaled = scaler2.fit_transform(X2_train)
X2_test_scaled = scaler2.transform(X2_test)
# Create a Logistic Regression classifier
logistic_model = LogisticRegression(max_iter=1000, random_state=42)
# Train the model

logistic_model.fit(X2_train_scaled, y2_train)
# Make predictions on the test set
y2_pred = logistic_model.predict(X2_test_scaled)
# Calculate the accuracy of the model
accuracy2 = accuracy_score(y2_test, y2_pred)
print(f'Logistic Regression Accuracy: {accuracy2}')
# Save the trained logistic regression model to a file
joblib.dump(logistic_model, 'logistic_regression_model.pkl')
print("Logistic Regression model training complete. The trained model is saved as 'logistic_regression_model.pkl'.")

Logistic Regression Accuracy: 1.0
Logistic Regression model training complete. The trained model is saved as 'logistic_regression_model.pkl'.


In [7]:
# predict using  logisric model
def predict_new_data_logistic(new_data):
    # Load the trained logistic regression model
    logistic_model = joblib.load('logistic_regression_model.pkl')
    # Standardize the new data
    new_data_scaled = scaler2.transform(new_data)
    # Make predictions
    predictions = logistic_model.predict(new_data_scaled)
    return predictions

In [9]:
# Example usage: Predict on new data
new_data = pd.DataFrame({
    'latitude': [26.543713],
    'longitude': [88.013611],
    'peakcurrent': [200],
    'icheight': [322],
    'numbersensors': [7],
    'icmultiplicity': [0],
    'cgmultiplicity': [0],
    'duration': [0.2],
    'ullatitude': [26.536683],
    'ullongitude': [26.548075],
    'lrlatitude': [87.993874],
    'lrlongitude': [88.017937]
})

predictions = predict_new_data_logistic(new_data)
# Display the predictions
print("Predictions for new data:", predictions)

Predictions for new data: [40]
