<a href="https://colab.research.google.com/github/oluwafemidiakhoa/Mindserach/blob/master/Earthmonitoring.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install requests geopandas shapely pandas rasterio matplotlib scikit-learn networkx folium


Collecting rasterio
  Downloading rasterio-1.3.10-cp310-cp310-manylinux2014_x86_64.whl.metadata (14 kB)
Collecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl.metadata (4.0 kB)
Collecting snuggs>=1.4.1 (from rasterio)
  Downloading snuggs-1.4.7-py3-none-any.whl.metadata (3.4 kB)
Downloading rasterio-1.3.10-cp310-cp310-manylinux2014_x86_64.whl (21.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.5/21.5 MB[0m [31m59.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading snuggs-1.4.7-py3-none-any.whl (5.4 kB)
Downloading affine-2.4.0-py3-none-any.whl (15 kB)
Installing collected packages: snuggs, affine, rasterio
Successfully installed affine-2.4.0 rasterio-1.3.10 snuggs-1.4.7


In [29]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import time
import folium

# Step 1: Data Ingestion and Integration
def fetch_earthquake_data():
    url = 'https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_week.csv'
    data = pd.read_csv(url)
    data['geometry'] = data.apply(lambda x: Point((x.longitude, x.latitude)), axis=1)
    gdf = gpd.GeoDataFrame(data, geometry='geometry')
    return gdf

def fetch_weather_data():
    data = {
        'latitude': [37.7749, 34.0522, 40.7128],
        'longitude': [-122.4194, -118.2437, -74.0060],
        'temperature': [15, 20, 22],
        'humidity': [80, 65, 70]
    }
    df = pd.DataFrame(data)
    df['geometry'] = df.apply(lambda x: Point((x.longitude, x.latitude)), axis=1)
    return gpd.GeoDataFrame(df, geometry='geometry')

earthquake_data = fetch_earthquake_data()
weather_data = fetch_weather_data()

def enrich_data(earthquake_gdf, weather_gdf):
    enriched_gdf = gpd.sjoin(earthquake_gdf, weather_gdf, how="left", predicate='intersects')
    return enriched_gdf

enriched_data = enrich_data(earthquake_data, weather_data)

# Step 2: Convert and Drop Columns
enriched_data['time'] = pd.to_datetime(enriched_data['time'], errors='coerce')
enriched_data['updated'] = pd.to_datetime(enriched_data['updated'], errors='coerce')

columns_to_drop = ['time', 'updated', 'magType', 'net', 'id', 'place', 'type', 'status', 'locationSource', 'magSource',
                   'geometry', 'index_right', 'latitude_right', 'longitude_right', 'temperature', 'humidity']
enriched_data = enriched_data.drop(columns=columns_to_drop)

# Step 3: Feature Preparation and Imputation of Missing Values
def prepare_features(gdf):
    features = gdf[['mag', 'depth', 'nst', 'gap', 'dmin', 'rms', 'horizontalError', 'depthError', 'magError', 'magNst']]

    # Impute missing values with the mean of each column
    imputer = SimpleImputer(strategy='mean')
    features_imputed = pd.DataFrame(imputer.fit_transform(features), columns=features.columns)

    # Add the target variable
    features_imputed['disaster'] = features_imputed['mag'].apply(lambda x: 1 if x >= 5.0 else 0)

    print(f"Number of samples after feature preparation: {len(features_imputed)}")
    return features_imputed

features = prepare_features(enriched_data)

# Step 4: Train-Test Split
X = features.drop(columns=['disaster'])
y = features['disaster']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 5: Model Training
model = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
model.fit(X_train, y_train)

# Step 6: Model Evaluation
y_pred = model.predict(X_test)
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Accuracy Score:", accuracy_score(y_test, y_pred))

# Step 7: Ensure Imputation on Real-Time Data
# Impute missing values in enriched_data before using it in real-time monitoring
imputer = SimpleImputer(strategy='mean')
enriched_data_imputed = pd.DataFrame(imputer.fit_transform(enriched_data), columns=enriched_data.columns)

# Ensure the latitude and longitude columns exist for the real-time monitoring
enriched_data_imputed['latitude'] = earthquake_data['latitude']
enriched_data_imputed['longitude'] = earthquake_data['longitude']

# Real-Time Monitoring Simulation
def real_time_monitoring(model, incoming_data):
    for index, row in incoming_data.iterrows():
        features = pd.DataFrame([row[['mag', 'depth', 'nst', 'gap', 'dmin', 'rms', 'horizontalError', 'depthError', 'magError', 'magNst']]])
        prediction = model.predict(features)
        if prediction == 1:
            print(f"ALERT: Potential disaster detected at latitude {row['latitude']}, longitude {row['longitude']} with magnitude {row['mag']}")
        time.sleep(1)  # Simulating real-time delay

X_test_reset = X_test.reset_index(drop=True)
real_time_monitoring(model, enriched_data_imputed.loc[X_test_reset.index])

# Step 8: Visualization
enriched_data_imputed['prediction'] = model.predict(features.drop(columns=['disaster']))

def create_disaster_map(gdf):
    m = folium.Map(location=[37.7749, -122.4194], zoom_start=5)
    for _, row in gdf.iterrows():
        folium.CircleMarker(
            location=[row['latitude'], row['longitude']],
            radius=5,
            color='red' if row['prediction'] == 1 else 'green',
            fill=True,
            fill_opacity=0.6,
        ).add_to(m)
    return m

disaster_map = create_disaster_map(enriched_data_imputed)
disaster_map.save("disaster_map.html")


Number of samples after feature preparation: 2114
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       629
           1       1.00      1.00      1.00         6

    accuracy                           1.00       635
   macro avg       1.00      1.00      1.00       635
weighted avg       1.00      1.00      1.00       635

Accuracy Score: 1.0
ALERT: Potential disaster detected at latitude -5.8323, longitude 147.132 with magnitude 5.1
ALERT: Potential disaster detected at latitude -20.7776, longitude -179.0997 with magnitude 5.0
ALERT: Potential disaster detected at latitude -4.5404, longitude 102.0507 with magnitude 5.0


In [49]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, accuracy_score
import time
import folium

# Step 1: Data Ingestion and Integration
def fetch_earthquake_data():
    url = 'https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_week.csv'
    data = pd.read_csv(url)
    data['geometry'] = data.apply(lambda x: Point((x.longitude, x.latitude)), axis=1)
    gdf = gpd.GeoDataFrame(data, geometry='geometry')
    return gdf

def fetch_weather_data():
    data = {
        'latitude': [37.7749, 34.0522, 40.7128],
        'longitude': [-122.4194, -118.2437, -74.0060],
        'temperature': [15, 20, 22],
        'humidity': [80, 65, 70]
    }
    df = pd.DataFrame(data)
    df['geometry'] = df.apply(lambda x: Point((x.longitude, x.latitude)), axis=1)
    return gpd.GeoDataFrame(df, geometry='geometry')

earthquake_data = fetch_earthquake_data()
weather_data = fetch_weather_data()

def enrich_data(earthquake_gdf, weather_gdf):
    enriched_gdf = gpd.sjoin(earthquake_gdf, weather_gdf, how="left", predicate='intersects')
    return enriched_gdf

enriched_data = enrich_data(earthquake_data, weather_data)

# Step 2: Convert and Drop Columns
enriched_data['time'] = pd.to_datetime(enriched_data['time'], errors='coerce')
enriched_data['updated'] = pd.to_datetime(enriched_data['updated'], errors='coerce')

columns_to_drop = ['time', 'updated', 'magType', 'net', 'id', 'place', 'type', 'status', 'locationSource', 'magSource',
                   'geometry', 'index_right', 'latitude_right', 'longitude_right', 'temperature', 'humidity']
enriched_data = enriched_data.drop(columns=columns_to_drop)

# Step 3: Feature Preparation (no imputation needed with HistGradientBoostingClassifier)
def prepare_features(gdf):
    features = gdf[['mag', 'depth', 'nst', 'gap', 'dmin', 'rms', 'horizontalError', 'depthError', 'magError', 'magNst']]

    # Scaling features
    scaler = MinMaxScaler()
    features_scaled = pd.DataFrame(scaler.fit_transform(features), columns=features.columns)

    # Add the target variable
    features_scaled['disaster'] = features['mag'].apply(lambda x: 1 if x >= 5.0 else 0)

    print(f"Number of samples after feature preparation: {len(features_scaled)}")
    return features_scaled

features = prepare_features(enriched_data)

# Step 4: Train-Test Split and Cross-Validation
X = features.drop(columns=['disaster'])
y = features['disaster']

# Using HistGradientBoostingClassifier (no need for imputation)
model = HistGradientBoostingClassifier(random_state=42)

# Cross-validation to evaluate model performance
cv_scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
print(f"Cross-Validation Accuracy Scores: {cv_scores}")
print(f"Mean CV Accuracy: {cv_scores.mean()}")

# Train the model on the entire dataset
model.fit(X, y)

# Step 5: Real-Time Monitoring Simulation (Optimized)
def real_time_monitoring(model, incoming_data):
    batch_size = 10  # Process data in batches instead of one at a time for efficiency
    for start in range(0, len(incoming_data), batch_size):
        end = min(start + batch_size, len(incoming_data))
        batch = incoming_data.iloc[start:end]
        features = batch[['mag', 'depth', 'nst', 'gap', 'dmin', 'rms', 'horizontalError', 'depthError', 'magError', 'magNst']]
        predictions = model.predict(features)
        for i, prediction in enumerate(predictions):
            if prediction == 1:
                print(f"ALERT: Potential disaster detected at latitude {batch.iloc[i]['latitude']}, longitude {batch.iloc[i]['longitude']} with magnitude {batch.iloc[i]['mag']}")

# Ensure the latitude and longitude columns exist for the real-time monitoring
enriched_data['latitude'] = earthquake_data['latitude']
enriched_data['longitude'] = earthquake_data['longitude']

real_time_monitoring(model, enriched_data)

# Step 6: Visualization
enriched_data['prediction'] = model.predict(features.drop(columns=['disaster']))

def create_disaster_map(gdf):
    m = folium.Map(location=[37.7749, -122.4194], zoom_start=5)
    for _, row in gdf.iterrows():
        folium.CircleMarker(
            location=[row['latitude'], row['longitude']],
            radius=5,
            color='red' if row['prediction'] == 1 else 'green',
            fill=True,
            fill_opacity=0.6,
        ).add_to(m)
    return m

disaster_map = create_disaster_map(enriched_data)
disaster_map.save("disaster_map.html")


Number of samples after feature preparation: 2112
Cross-Validation Accuracy Scores: [0.9929078  0.99054374 0.99763033 0.99763033 0.99526066]
Mean CV Accuracy: 0.9947945727314489
ALERT: Potential disaster detected at latitude 60.7426, longitude -152.0222 with magnitude 2.8
ALERT: Potential disaster detected at latitude 34.4043333, longitude -116.4828333 with magnitude 1.28
ALERT: Potential disaster detected at latitude 35.01133347, longitude -97.77292633 with magnitude 1.58
ALERT: Potential disaster detected at latitude 64.9659, longitude -147.4163 with magnitude 1.1
ALERT: Potential disaster detected at latitude 19.3810005187988, longitude -155.252502441406 with magnitude 1.95
ALERT: Potential disaster detected at latitude 14.7144, longitude 147.0954 with magnitude 4.8
ALERT: Potential disaster detected at latitude 32.7465, longitude -117.3988333 with magnitude 1.37
ALERT: Potential disaster detected at latitude 61.4587, longitude -140.4342 with magnitude 2.5
ALERT: Potential disaster 