In [1]:
import pandas as pd
import plotly.express as px
from dash import Dash, dcc, html, Input, Output
pd.set_option('display.max_columns', None)

In [2]:
df = pd.read_csv('database.csv')
df.head()

Unnamed: 0,Date,Time,Latitude,Longitude,Type,Depth,Depth Error,Depth Seismic Stations,Magnitude,Magnitude Type,Magnitude Error,Magnitude Seismic Stations,Azimuthal Gap,Horizontal Distance,Horizontal Error,Root Mean Square,ID,Source,Location Source,Magnitude Source,Status
0,01/02/1965,13:44:18,19.246,145.616,Earthquake,131.6,,,6.0,MW,,,,,,,ISCGEM860706,ISCGEM,ISCGEM,ISCGEM,Automatic
1,01/04/1965,11:29:49,1.863,127.352,Earthquake,80.0,,,5.8,MW,,,,,,,ISCGEM860737,ISCGEM,ISCGEM,ISCGEM,Automatic
2,01/05/1965,18:05:58,-20.579,-173.972,Earthquake,20.0,,,6.2,MW,,,,,,,ISCGEM860762,ISCGEM,ISCGEM,ISCGEM,Automatic
3,01/08/1965,18:49:43,-59.076,-23.557,Earthquake,15.0,,,5.8,MW,,,,,,,ISCGEM860856,ISCGEM,ISCGEM,ISCGEM,Automatic
4,01/09/1965,13:32:50,11.938,126.427,Earthquake,15.0,,,5.8,MW,,,,,,,ISCGEM860890,ISCGEM,ISCGEM,ISCGEM,Automatic


In [None]:
df.info()

#### ***Converting string date object to datetime object***

In [3]:
df['Date'] = pd.to_datetime(df['Date'], utc=True)
df['Time'] = pd.to_datetime(df['Time'], utc=True)
# df['Date'].info()
# df['Time'].info()

In [4]:
df["Year"] = pd.to_datetime(df['Date']).dt.year
df["Month"] = pd.to_datetime(df['Date']).dt.month
df["Day"] = pd.to_datetime(df['Date']).dt.day
df["Hour"] = pd.to_datetime(df['Time']).dt.hour
df["Minute"] = pd.to_datetime(df['Time']).dt.minute
df.drop(columns=['Date','Time'], inplace=True)
# df.head(2)

#### ***To Find unique values in the dataset***

In [None]:
def uniquevalues(columns):
    """ This function will check the number of uniques values in each column and it will return the list of columns having one unique value"""
    zeroVarColumns = []
    for column in columns:
        count = df[column].nunique()
        if count == 1:
            zeroVarColumns.append(column)

    return zeroVarColumns


columnsToDrop = uniquevalues(list(df.columns))
print(columnsToDrop)

#### ***To Find missing values in the dataset***

In [9]:
def missingValuePercentage(columns, threshold_value):
    """ This function will calculate the percentage of null values in a column and it will return the list of columns having null values more than the threshold value"""
    nullValueColumns = []
    for column in columns:
        count = (df[column].isnull().mean())*100
        if count > threshold_value:
            nullValueColumns.append(column)

    return nullValueColumns
columnsToDrop = missingValuePercentage(list(df.columns), 70)
print(columnsToDrop)

['Depth Error', 'Magnitude Error', 'Magnitude Seismic Stations', 'Horizontal Distance', 'Horizontal Error']


In [None]:
df.head(2)

***Droping the columns***

In [None]:
df.drop(columns=columnsToDrop, inplace=True)

#### ***Displaying Earthquakes on map based on the magnitude***

In [8]:
app = Dash(__name__)

app.layout = html.Div([
    html.H4("Earth Quake Magnitude on Map"),
    html.P("x-axis:"),
    dcc.RadioItems(
        id='x-axis', 
        options=[1965, 1966, 2011, 2016],
        value=1965, 
        inline=True
    ),
    html.P("y-axis:"),
    dcc.RadioItems(
        id='y-axis', 
        options=['Magnitude'],
        value='Magnitude', 
        inline=True
    ),
    dcc.Graph(id="graph"),
])


@app.callback(
    Output("graph", "figure"), 
    Input("x-axis", "value"), 
    Input("y-axis", "value"))

def generate_chart(x, y):
    Maptitle = "EarthQuakes Worldwide with Magnitude"
    fig = px.density_mapbox(df[df['Year'] == x], lat='Latitude', lon='Longitude', z=y, radius=10,
                        center=dict(lat=0, lon=180), zoom=0,
                        mapbox_style="stamen-terrain", title=Maptitle)
    return fig

app.run_server(debug=True)

In [None]:
def nuniquie_Values(cols):
    for col in cols:
        total_uniqueVals = df[col].nunique()
        print("{col} has totat {values} unique values".format(col = col, values=total_uniqueVals))

cat_cols = df.select_dtypes(include=['object']).columns.tolist()
nuniquie_Values(cat_cols)

In [None]:
df.head(2)

In [None]:
# droping ID columns
#df.drop(columns=['ID'],inplace=True)
df.head(5)

In [6]:
app1 = Dash(__name__)

app1.layout = html.Div([
    html.H4("Analysis of the magnitude value"),
    html.P("x-axis:"),
    dcc.RadioItems(
        id='x-axis', 
        options=['Type', 'Source', 'Year', 'Magnitude Type'],
        value='Type', 
        inline=True
    ),
    html.P("y-axis:"),
    dcc.RadioItems(
        id='y-axis', 
        options=['Magnitude'],
        value='Magnitude', 
        inline=True
    ),
    dcc.Graph(id="graph"),
])


@app1.callback(
    Output("graph", "figure"), 
    Input("x-axis", "value"), 
    Input("y-axis", "value"))

def generate_chart(x, y):
    fig = px.box(df, x=x, y=y)
    return fig


app1.run_server(debug=True)

In [None]:
fig = px.imshow(df.corr(), text_auto=True, aspect="auto")
fig.show()

In [None]:
from sklearn.preprocessing import OneHotEncoder

def convertCat_to_Num(cols):
    for col in cols:
        enc = OneHotEncoder().fit(df[[col]])
        encoded = enc.transform(df[[col]])
        df[enc.categories_[0]] = encoded.toarray()
        df.drop(columns=col,inplace=True)

cat_cols = df.select_dtypes(include=['object']).columns.tolist()
convertCat_to_Num(cat_cols)
df1 = df.copy()
df1.head(2)

In [None]:
first_column = df1.pop('Magnitude')
df1.insert(0, 'Magnitude', first_column)
df1.head(2)

In [None]:
df1.columns = df1.columns.astype(str)

In [None]:
target_feature = df1.iloc[:,0:1]
input_feature = df1.iloc[:,1:]

In [None]:
from sklearn.preprocessing import StandardScaler
scalar = StandardScaler()
scaled_data = pd.DataFrame(scalar.fit_transform(input_feature)) #scaling the data
scaled_data

In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components=5)
df_pca = pca.fit_transform(X=scaled_data)

In [None]:
data_pca = pd.DataFrame(df_pca,columns=['PC1','PC2','PC3','PC4','PC5'])

In [None]:
data_pca['Magnitude'] = target_feature

In [None]:
data_pca.head(2)

In [None]:
# df1['Azimuthal Gap'].isnull().mean()
# df1['Depth Seismic Stations'].isnull().mean()
#df1.drop(columns=['Root Mean Square'], inplace=True)

In [None]:
import seaborn as sns
sns.heatmap(data_pca.corr())

In [None]:
X = data_pca.iloc[:,0:5]
y = data_pca.iloc[:,5:]

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.50)

In [None]:
X_test.shape

In [None]:
# from sklearn.linear_model import LinearRegression
# model = LinearRegression()
# model.fit(X_train, y_train)
# r_sq = model.score(X_train, y_train)
# print(f"coefficient of determination: {r_sq}")

In [None]:
# y_pred = model.predict(X_test)

In [None]:
# from sklearn.metrics import accuracy_score
# accuracy_score(y_test, y_pred)

In [None]:
# training a DescisionTreeClassifier
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier
dtree_model = DecisionTreeClassifier(max_depth = 2).fit(X_train, y_train)
dtree_predictions = dtree_model.predict(X_test)
  
# creating a confusion matrix
cm = confusion_matrix(y_test, dtree_predictions)