In [None]:
import requests
import pandas as pd

def get_global_earthquakes(min_magnitude=3.5, limit=10000, save_path="global_earthquakes.csv"):
   
    url = "https://earthquake.usgs.gov/fdsnws/event/1/query"
    params = {
        "format": "geojson",
        "minmagnitude": min_magnitude,
        "limit": limit,
        "orderby": "time"
    }
    
    response = requests.get(url, params=params)
    if response.status_code != 200:
        print("‚ö†Ô∏è USGS API ÏöîÏ≤≠ Ïã§Ìå®! ÏÉÅÌÉú ÏΩîÎìú:", response.status_code)
        return pd.DataFrame()  

    data = response.json()

    earthquakes = []
    for feature in data.get("features", []):
        properties = feature["properties"]
        geometry = feature["geometry"]
        earthquakes.append({
            "time": properties["time"],
            "magnitude": properties["mag"],
            "place": properties["place"],
            "lat": geometry["coordinates"][1],
            "lon": geometry["coordinates"][0],
            "depth": geometry["coordinates"][2]
        })

    df = pd.DataFrame(earthquakes)

    # CSV ÌååÏùºÎ°ú Ï†ÄÏû•
    df.to_csv(save_path, index=False, encoding="utf-8")
    print(f" Îç∞Ïù¥ÌÑ∞Í∞Ä '{save_path}' ÌååÏùºÎ°ú Ï†ÄÏû•ÎêòÏóàÏäµÎãàÎã§!")

    return df


df_global = get_global_earthquakes()
print(df_global.head()) 


‚úÖ Îç∞Ïù¥ÌÑ∞Í∞Ä 'global_earthquakes.csv' ÌååÏùºÎ°ú Ï†ÄÏû•ÎêòÏóàÏäµÎãàÎã§!
            time  magnitude                                         place  \
0  1740837593979       5.00             22 km NE of Taytayan, Philippines   
1  1740835055757       4.40                                   Fiji region   
2  1740835025805       4.50                      21 km E of Shinj≈ç, Japan   
3  1740833885750       4.27            144 km NNE of Vieques, Puerto Rico   
4  1740831485700       3.55  123 km NNE of Punta Cana, Dominican Republic   

       lat       lon    depth  
0   7.8935  126.6352   34.140  
1 -21.0123 -178.5754  527.973  
2  38.7729  140.5520   10.000  
3  19.6466  -65.3375   25.000  
4  19.6238  -67.9941   37.000  


In [37]:
from sklearn.preprocessing import StandardScaler

df_global = df_global.dropna()  
X = df_global[['lat', 'lon', 'depth']]  
y = (df_global['magnitude'] > 4.0).astype(int)  



In [38]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [39]:
from sklearn.neighbors import KNeighborsClassifier

# KNN Î™®Îç∏ ÌïôÏäµ
knn = KNeighborsClassifier(n_neighbors=20,weights='distance')
knn.fit(X_scaled, y)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

# KNN Î™®Îç∏ ÌïôÏäµ
knn.fit(X_train, y_train)


y_pred = knn.predict(X_test)


accuracy = accuracy_score(y_test, y_pred)
print(f" Î™®Îç∏ Ï†ïÌôïÎèÑ: {accuracy:.4f}")


print("\n Î∂ÑÎ•ò Î≥¥Í≥†ÏÑú (Classification Report):")
print(classification_report(y_test, y_pred))

print("\nÌòºÎèô ÌñâÎ†¨ (Confusion Matrix):")
print(confusion_matrix(y_test, y_pred))


‚úÖ Î™®Îç∏ Ï†ïÌôïÎèÑ: 0.9218

üìä Î∂ÑÎ•ò Î≥¥Í≥†ÏÑú (Classification Report):
              precision    recall  f1-score   support

           0       0.76      0.46      0.58        28
           1       0.93      0.98      0.96       215

    accuracy                           0.92       243
   macro avg       0.85      0.72      0.77       243
weighted avg       0.91      0.92      0.91       243


üßÆ ÌòºÎèô ÌñâÎ†¨ (Confusion Matrix):
[[ 13  15]
 [  4 211]]


In [29]:
import numpy as np

def predict_earthquake(lat, lon, depth=10.0):
    """
    ÏÉàÎ°úÏö¥ Ï¢åÌëúÏùò ÏßÄÏßÑ Î∞úÏÉù ÌôïÎ•†ÏùÑ KNNÏù¥ ÏûêÎèôÏúºÎ°ú ÏòàÏ∏°
    """
    input_df = pd.DataFrame([[lat, lon, depth]], columns=['lat', 'lon', 'depth'])
    input_scaled = scaler.transform(input_df)
    prob = knn.predict_proba(input_scaled)[0]  # ÏßÄÏßÑ Î∞úÏÉù ÌôïÎ•† (0~1)
    return round(prob[1] * 100, 2) if len(prob) > 1 else round(prob[0] * 100, 2)


In [None]:
lat_test, lon_test = 37.5, 127.0  # ÏÑúÏö∏
test_prob = predict_earthquake(lat_test, lon_test)
print(f"üìç {lat_test}, {lon_test}Ïùò ÏßÄÏßÑ Î∞úÏÉù ÌôïÎ•†: {test_prob}%")

üìç 37.5, 127.0Ïùò ÏßÄÏßÑ Î∞úÏÉù ÌôïÎ•†: 95.58%


In [None]:
print(" ÌõàÎ†® Îç∞Ïù¥ÌÑ∞ Í∞úÏàò:", df_global.shape[0])


‚úÖ ÌõàÎ†® Îç∞Ïù¥ÌÑ∞ Í∞úÏàò: 1211


In [None]:
print(" ÏßÄÏßÑ Î∞úÏÉù Ïó¨Î∂Ä ÎπÑÏú®:")
print(df_global['magnitude'].apply(lambda x: 1 if x > 4.5 else 0).value_counts(normalize=True))


‚úÖ ÏßÄÏßÑ Î∞úÏÉù Ïó¨Î∂Ä ÎπÑÏú®:
magnitude
0    0.656482
1    0.343518
Name: proportion, dtype: float64


In [None]:
from sklearn.utils import resample

#  Îç∞Ïù¥ÌÑ∞Î•º Î∂ÑÎ¶¨
df_small_quakes = df_global[df_global['magnitude'] < 4.5]  
df_large_quakes = df_global[df_global['magnitude'] >= 4.5]  

In [None]:

# Oversampling
if len(df_small_quakes) > 0:
    df_large_quakes_resampled = resample(df_large_quakes,
                                         replace=True,  # Î≥µÏ†ú ÌóàÏö©
                                         n_samples=len(df_small_quakes),  # ÏûëÏùÄ ÏßÄÏßÑÍ≥º Í∞úÏàò ÎßûÏ∂îÍ∏∞
                                         random_state=42)
else:
    df_large_quakes_resampled = df_large_quakes.copy()

In [None]:

df_balanced = pd.concat([df_small_quakes, df_large_quakes_resampled])


count    1354.000000
mean        4.489697
std         0.425741
min         3.500000
25%         4.200000
50%         4.480000
75%         4.700000
max         7.600000
Name: magnitude, dtype: float64


In [None]:
X = df_balanced[['lat', 'lon', 'depth']]
y = (df_balanced['magnitude'] >= 4.5).astype(int) 


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:

# KNN ÌïôÏäµ 
knn = KNeighborsClassifier(n_neighbors=500) 
knn.fit(X_scaled, y)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

knn.fit(X_train, y_train)


y_pred = knn.predict(X_test)



‚úÖ Î™®Îç∏ Ï†ïÌôïÎèÑ: 0.5609

üìä Î∂ÑÎ•ò Î≥¥Í≥†ÏÑú (Classification Report):
              precision    recall  f1-score   support

           0       0.55      0.71      0.62       136
           1       0.58      0.41      0.48       135

    accuracy                           0.56       271
   macro avg       0.57      0.56      0.55       271
weighted avg       0.57      0.56      0.55       271


üßÆ ÌòºÎèô ÌñâÎ†¨ (Confusion Matrix):
[[96 40]
 [79 56]]


In [None]:

accuracy = accuracy_score(y_test, y_pred)
print(f" Î™®Îç∏ Ï†ïÌôïÎèÑ: {accuracy:.4f}")


print("\n Î∂ÑÎ•ò Î≥¥Í≥†ÏÑú (Classification Report):")
print(classification_report(y_test, y_pred))


print("\nÌòºÎèô ÌñâÎ†¨ (Confusion Matrix):")
print(confusion_matrix(y_test, y_pred))

In [None]:
test_prob = predict_earthquake(37.5, 127.0)
print(f" 37.5, 127.0Ïùò ÏßÄÏßÑ Î∞úÏÉù ÌôïÎ•† (Í∞úÏÑ† ÌõÑ): {test_prob}%")


üìç 37.5, 127.0Ïùò ÏßÄÏßÑ Î∞úÏÉù ÌôïÎ•† (Í∞úÏÑ† ÌõÑ): 43.6%


In [None]:
from sklearn.ensemble import RandomForestClassifier

# Random Forest Î™®Îç∏ ÌïôÏäµ
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_scaled, y)

def predict_earthquake_rf(lat, lon, depth=10.0):
    input_df = pd.DataFrame([[lat, lon, depth]], columns=['lat', 'lon', 'depth'])
    input_scaled = scaler.transform(input_df)
    prob = rf.predict_proba(input_scaled)[0][1] 
    return round(prob * 100, 2)

# ÏÑúÏö∏
test_prob_rf = predict_earthquake_rf(37.5, 127.0)
print(f" 37.5, 127.0Ïùò ÏßÄÏßÑ Î∞úÏÉù ÌôïÎ•† (Random Forest): {test_prob_rf}%")


üìç 37.5, 127.0Ïùò ÏßÄÏßÑ Î∞úÏÉù ÌôïÎ•† (Random Forest): 32.0%


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)


rf.fit(X_train, y_train)


y_pred_rf = rf.predict(X_test)


‚úÖ Random Forest Î™®Îç∏ Ï†ïÌôïÎèÑ: 0.8044

üìä Î∂ÑÎ•ò Î≥¥Í≥†ÏÑú (Classification Report):
              precision    recall  f1-score   support

           0       0.83      0.77      0.80       136
           1       0.78      0.84      0.81       135

    accuracy                           0.80       271
   macro avg       0.81      0.80      0.80       271
weighted avg       0.81      0.80      0.80       271


üßÆ ÌòºÎèô ÌñâÎ†¨ (Confusion Matrix):
[[105  31]
 [ 22 113]]


In [None]:
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print(f" Random Forest Î™®Îç∏ Ï†ïÌôïÎèÑ: {accuracy_rf:.4f}")

print("\n Î∂ÑÎ•ò Î≥¥Í≥†ÏÑú (Classification Report):")
print(classification_report(y_test, y_pred_rf))


print("\nÌòºÎèô ÌñâÎ†¨ (Confusion Matrix):")
print(confusion_matrix(y_test, y_pred_rf))

In [None]:
import joblib

#  1. Î™®Îç∏ Ï†ÄÏû• (Random Forest)
joblib.dump(rf, "earthquake_model.joblib")

#  2. Ïä§ÏºÄÏùºÎü¨ Ï†ÄÏû• (Îç∞Ïù¥ÌÑ∞ Ï†ïÍ∑úÌôîÎ•º ÏúÑÌï¥ ÌïÑÏöî)
joblib.dump(scaler, "scaler.joblib")

print(" Î™®Îç∏Í≥º Ïä§ÏºÄÏùºÎü¨ Ï†ÄÏû• ÏôÑÎ£å (joblib ÏÇ¨Ïö©)")


‚úÖ Î™®Îç∏Í≥º Ïä§ÏºÄÏùºÎü¨ Ï†ÄÏû• ÏôÑÎ£å (joblib ÏÇ¨Ïö©)
