In [1]:
import csv
import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
data = pd.read_csv('Earthquakes.csv')

data.head()

Unnamed: 0,latitude,longitude,depth,mag,gap,dmin,rms,horizontalError,depthError,magError,meter
0,5.1033,125.3179,44.32,4.3,131,1.97,0.7,5.0,9.3,0.126,18.0
1,-8.8535,117.1527,116.88,4.9,59,2.991,1.06,6.5,7.3,0.091,38.0
2,4.7512,96.2239,10.0,5.3,148,0.862,1.29,6.6,1.8,0.073,18.0
3,-10.1202,118.7274,10.0,4.5,86,3.764,0.95,7.6,1.9,0.084,42.0
4,1.4361,127.0142,120.78,4.5,105,0.748,0.76,8.0,6.6,0.089,37.0


In [3]:
data.shape

(1369, 11)

In [4]:
def get_csv_column_names(file_path):
    with open(file_path, 'r') as csvfile:
        reader = csv.reader(csvfile)
        return next(reader)

column_names = get_csv_column_names('Earthquakes.csv')
column_names

['latitude',
 'longitude',
 'depth',
 'mag',
 'gap',
 'dmin',
 'rms',
 'horizontalError',
 'depthError',
 'magError',
 'meter']

In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1369 entries, 0 to 1368
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   latitude         1369 non-null   float64
 1   longitude        1369 non-null   float64
 2   depth            1369 non-null   float64
 3   mag              1369 non-null   float64
 4   gap              1369 non-null   int64  
 5   dmin             1369 non-null   float64
 6   rms              1369 non-null   float64
 7   horizontalError  1369 non-null   float64
 8   depthError       1369 non-null   float64
 9   magError         1368 non-null   float64
 10  meter            1368 non-null   float64
dtypes: float64(10), int64(1)
memory usage: 117.8 KB


In [6]:
imp = SimpleImputer(missing_values = np.nan, strategy = 'mean')

data = pd.DataFrame(imp.fit_transform(data), columns = column_names)

In [7]:
inputData = data.drop(data.columns[-1], axis = 1)   # data.iloc[:, :-1]
outputData = data[data.columns[-1]]                 # data.iloc[:, -1]

inputData.head()

Unnamed: 0,latitude,longitude,depth,mag,gap,dmin,rms,horizontalError,depthError,magError
0,5.1033,125.3179,44.32,4.3,131.0,1.97,0.7,5.0,9.3,0.126
1,-8.8535,117.1527,116.88,4.9,59.0,2.991,1.06,6.5,7.3,0.091
2,4.7512,96.2239,10.0,5.3,148.0,0.862,1.29,6.6,1.8,0.073
3,-10.1202,118.7274,10.0,4.5,86.0,3.764,0.95,7.6,1.9,0.084
4,1.4361,127.0142,120.78,4.5,105.0,0.748,0.76,8.0,6.6,0.089


In [8]:
scaler = StandardScaler()

inputData = pd.DataFrame(scaler.fit_transform(inputData))

In [9]:
xtrain, xtest, ytrain, ytest = train_test_split(inputData, outputData, test_size = 0.3, random_state = 42)

In [10]:
svr = SVR(kernel = 'rbf', C = 100, epsilon = 0.1)
svr.fit(xtrain, ytrain)

ypred = svr.predict(xtest)

In [11]:
r2 = r2_score(ytest, ypred)
mse = mean_squared_error(ytest, ypred)

print("R2  : ", r2)
print("MSE : ", mse)

R2  :  0.8805627441254434
MSE :  75.93710865545778
