In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import csv
import plotly.express as px

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from datetime import datetime
from sklearn.cluster import KMeans
import seaborn as sns

In [2]:
Earthquakes1 = pd.read_csv("earthquakes1.txt", delimiter = ';') 
  
# storing this dataframe in a csv file 
Earthquakes1.to_csv('earthquakes1.csv', index = None)

In [4]:
Earthquakes1.shape

(483270, 11)

In [5]:
Earthquakes1.head()

Unnamed: 0,XCoord,YCoord,mag,magType,place,latitude,longitude,kmDepth,depth,eventTime,eventType
0,6798700000,3829000000,4600000,m,"8 km ENE of Denov, Uzbekistan",38290001,67987000,45700001,45700001,2007-12-21 23:49:31,Earthquake
1,8818000000,4239200000,4100000,m,"102 km SW of Turpan, China",42391998,88180000,36299999,36299999,2007-11-18 11:35:20,Earthquake
2,16625200000,-1098800000,4700000,m,"57 km ESE of Lata, Solomon Islands",-10988000,166251999,183300003,183300003,2007-12-21 22:58:26,Earthquake
3,2967600000,3896700000,4000000,m,Western Turkey,38966999,29676001,37799999,37799999,2007-11-18 10:48:28,Earthquake
4,17837000000,-3882000000,4800000,ml,"36 km ESE of Gisborne, New Zealand",-38820000,178369995,34000000,34000000,2007-12-21 22:35:11,Earthquake


In [6]:
#columns of dataframe
for col in Earthquakes1.columns:
    print(col)

XCoord
YCoord
mag
magType
place
latitude
longitude
kmDepth
depth
eventTime
eventType


In [7]:
# Remove three columns as index base
Earthquakes1.drop(Earthquakes1.columns[[0, 1, 2,3,4,5,6,7,8]], axis=1, inplace=True)

In [8]:
Earthquakes1.head()

Unnamed: 0,eventTime,eventType
0,2007-12-21 23:49:31,Earthquake
1,2007-11-18 11:35:20,Earthquake
2,2007-12-21 22:58:26,Earthquake
3,2007-11-18 10:48:28,Earthquake
4,2007-12-21 22:35:11,Earthquake


In [9]:
Earthquakes1.dtypes

eventTime    object
eventType    object
dtype: object

In [10]:
# overwriting data after changing format
Earthquakes1['eventTime']= pd.to_datetime(Earthquakes1['eventTime'])

In [11]:
Earthquakes1.dtypes

eventTime    datetime64[ns]
eventType            object
dtype: object

In [12]:
Earthquakes1.head()

Unnamed: 0,eventTime,eventType
0,2007-12-21 23:49:31,Earthquake
1,2007-11-18 11:35:20,Earthquake
2,2007-12-21 22:58:26,Earthquake
3,2007-11-18 10:48:28,Earthquake
4,2007-12-21 22:35:11,Earthquake


In [13]:
#removing the time component
from datetime import datetime
Earthquakes1['eventTime'] = Earthquakes1['eventTime'].apply(lambda x: x.date())

In [14]:
Earthquakes1.head()

Unnamed: 0,eventTime,eventType
0,2007-12-21,Earthquake
1,2007-11-18,Earthquake
2,2007-12-21,Earthquake
3,2007-11-18,Earthquake
4,2007-12-21,Earthquake


In [15]:
Earthquakes1.eventType.unique()

array(['Earthquake', 'Nuclear Explosion', 'Quarry Blast', 'Other Event',
       'Explosion', 'Mine Collapse', 'Rock Burst', 'Sonic Boom',
       'Mining Explosion', 'Landslide', 'Collapse', 'Volcanic Eruption'],
      dtype=object)

In [16]:
Earthquakes1.eventType.value_counts()

Earthquake           482420
Nuclear Explosion       707
Volcanic Eruption        63
Explosion                29
Mine Collapse            18
Quarry Blast             10
Rock Burst                7
Other Event               6
Mining Explosion          6
Sonic Boom                2
Landslide                 1
Collapse                  1
Name: eventType, dtype: int64

In [17]:
Earthquakes2=Earthquakes1.query("eventType == 'Earthquake'")

In [18]:
Earthquakes2.eventType.value_counts()

Earthquake    482420
Name: eventType, dtype: int64

In [19]:
Earthquakes2.head()

Unnamed: 0,eventTime,eventType
0,2007-12-21,Earthquake
1,2007-11-18,Earthquake
2,2007-12-21,Earthquake
3,2007-11-18,Earthquake
4,2007-12-21,Earthquake


In [20]:
Earthquakes2.eventType.unique()

array(['Earthquake'], dtype=object)

In [21]:
counts = Earthquakes2['eventTime'].value_counts()
Earthquakes3 = counts.to_frame()



Earthquakes3['Date'] = Earthquakes3.index
Earthquakes3 = Earthquakes3.rename(columns={'eventTime': 'Noccurences'})
Earthquakes3.reset_index(inplace=True, drop=True)
Earthquakes3.head()


Unnamed: 0,Noccurences,Date
0,565,2011-03-11
1,523,2011-03-12
2,370,2011-03-13
3,349,2010-02-27
4,307,2004-12-26


In [22]:
Earthquakes3.sort_values(by='Date', ascending = False, inplace = True) 
Earthquakes3.tail()

Unnamed: 0,Noccurences,Date
24945,1,1901-03-03
22173,1,1900-10-29
24944,1,1900-10-09
22172,1,1900-07-29
24931,1,1900-04-30


In [50]:
Earthquakes3.tail()

Unnamed: 0,Noccurences,Date
24945,1,1901-03-03
22173,1,1900-10-29
24944,1,1900-10-09
22172,1,1900-07-29
24931,1,1900-04-30


In [45]:
Earthquakes3.shape

(25558, 2)

In [46]:
Earthquakes3.dtypes

Noccurences     int64
Date           object
dtype: object

In [47]:
Earthquakes3['Date'] = pd.to_datetime(Earthquakes3['Date'])

In [48]:
Earthquakes3.dtypes

Noccurences             int64
Date           datetime64[ns]
dtype: object

In [51]:
Earthquakes3.head()

Unnamed: 0,Noccurences,Date
12078,17,2024-01-08
8915,23,2024-01-07
14971,12,2024-01-06
11289,18,2024-01-05
8356,24,2024-01-04


In [52]:
Earthquakes3.to_csv('Earthquakes3.csv')

In [67]:
#read distance between earth and other planets

In [71]:
# file = open("jupiter_Earth.txt", "r")
# content = file.read()
# print(content)
# file.close()

In [67]:
import re
import datetime

# Create an empty DataFrame with column names
DataFrame_JupEarth = pd.DataFrame(columns=['Date', 'Jupiter_Earth'])

file = open("jupiter_Earth.txt", "r")
i=0
while True:
    content=file.readline()
    if not content:
        break
#     print(content)
    r1 = re.findall(r"\d{4}/\d{2}/\d{2}",content)
    r2=re.findall(r"\,.*?\],",content)
    r3=re.findall(r"\,.*?\]",r2[0][1:])
    #################################################
    format = '%Y/%m/%d'
    # convert from string format to datetime format
    Dt = datetime.datetime.strptime(r1[0], format)
#     Dt=r1[0]
    Dis=float(r3[0][1:-1])
    
#     print(Dt.date())
#     print(Dis)
    DataFrame_JupEarth.loc[i] = [Dt.date(),Dis]
    
    i=i+1  

file.close()

In [68]:
DataFrame_JupEarth.shape
DataFrame_JupEarth['Date'] = pd.to_datetime(DataFrame_JupEarth['Date'])

In [69]:
DataFrame_JupEarth.shape

(23740, 2)

In [70]:
DataFrame_JupEarth.dtypes

Date             datetime64[ns]
Jupiter_Earth           float64
dtype: object

In [71]:
DataFrame_JupEarth.head()

Unnamed: 0,Date,Jupiter_Earth
0,1970-01-03,5.71434
1,1970-01-05,5.68371
2,1970-01-07,5.65275
3,1970-01-09,5.6215
4,1970-01-11,5.58999


In [72]:
DataFrame_JupEarth.to_csv('DataFrame_JupEarth.csv')

In [58]:
#mars-earth

In [73]:
import re
import datetime

# Create an empty DataFrame with column names
DataFrame_MarsEarth = pd.DataFrame(columns=['Date', 'Mars_Earth'])

file = open("Mars_Earth.txt", "r")
i=0
while True:
    content=file.readline()
    if not content:
        break
#     print(content)
    r1 = re.findall(r"\d{4}/\d{2}/\d{2}",content)
    r2=re.findall(r"\,.*?\],",content)
    r3=re.findall(r"\,.*?\]",r2[0][1:])
    #################################################
    format = '%Y/%m/%d'
    # convert from string format to datetime format
    Dt = datetime.datetime.strptime(r1[0], format)

    Dis=float(r3[0][1:-1])

    DataFrame_MarsEarth.loc[i] = [Dt.date(),Dis]
    
    i=i+1  

file.close()

In [74]:
DataFrame_MarsEarth.shape

(23740, 2)

In [75]:
DataFrame_MarsEarth['Date'] = pd.to_datetime(DataFrame_MarsEarth['Date'])

In [76]:
DataFrame_MarsEarth.dtypes

Date          datetime64[ns]
Mars_Earth           float64
dtype: object

In [77]:
DataFrame_MarsEarth.head()

Unnamed: 0,Date,Mars_Earth
0,1970-01-03,1.59266
1,1970-01-05,1.60675
2,1970-01-07,1.62087
3,1970-01-09,1.63501
4,1970-01-11,1.64916


In [78]:
DataFrame_MarsEarth.to_csv('DataFrame_MarsEarth.csv')

In [62]:
#Mercure to Earth

In [114]:
import re
import datetime

# Create an empty DataFrame with column names
DataFrame_MercureEarth = pd.DataFrame(columns=['Date', 'Mercure_Earth'])

file = open("Mercure_Earth.txt", "r")
i=0
while True:
    content=file.readline()
    if not content:
        break
#     print(content)
    r1 = re.findall(r"\d{4}/\d{2}/\d{2}",content)
    r2=re.findall(r"\,.*?\],",content)
    r3=re.findall(r"\,.*?\]",r2[0][1:])
    #################################################
    format = '%Y/%m/%d'
    # convert from string format to datetime format
    Dt = datetime.datetime.strptime(r1[0], format)

    Dis=float(r3[0][1:-1])

    DataFrame_MercureEarth.loc[i] = [Dt.date(),Dis]
    
    i=i+1  

file.close()

In [115]:
DataFrame_MercureEarth.shape

(23740, 2)

In [116]:
DataFrame_MercureEarth['Date'] = pd.to_datetime(DataFrame_MercureEarth['Date'])

In [117]:
DataFrame_MercureEarth.dtypes

Date             datetime64[ns]
Mercure_Earth           float64
dtype: object

In [118]:
DataFrame_MercureEarth.head()

Unnamed: 0,Date,Mercure_Earth
0,1970-01-03,0.83783
1,1970-01-05,0.78918
2,1970-01-07,0.74573
3,1970-01-09,0.71002
4,1970-01-11,0.6843


In [119]:
DataFrame_MercureEarth.to_csv('DataFrame_MercureEarth.csv')

In [84]:
#Moon to Earth

In [85]:
import re
import datetime

# Create an empty DataFrame with column names
DataFrame_MoonEarth = pd.DataFrame(columns=['Date', 'Moon_Earth'])

file = open("Moon_Earth.txt", "r")
i=0
while True:
    content=file.readline()
    if not content:
        break
#     print(content)
    r1 = re.findall(r"\d{4}/\d{2}/\d{2}",content)
    r2=re.findall(r"\,.*?\],",content)
    r3=re.findall(r"\,.*?\]",r2[0][1:])
    #################################################
    format = '%Y/%m/%d'
    # convert from string format to datetime format
    Dt = datetime.datetime.strptime(r1[0], format)

    Dis=float(r3[0][1:-1])

    DataFrame_MoonEarth.loc[i] = [Dt.date(),Dis]
    
    i=i+1  

file.close()

In [86]:
DataFrame_MoonEarth.shape

(23740, 2)

In [87]:
DataFrame_MoonEarth['Date'] = pd.to_datetime(DataFrame_MoonEarth['Date'])

In [88]:
DataFrame_MoonEarth.head()

Unnamed: 0,Date,Moon_Earth
0,1970-01-03,0.00254
1,1970-01-05,0.00246
2,1970-01-07,0.0024
3,1970-01-09,0.00239
4,1970-01-11,0.00243


In [89]:
DataFrame_MoonEarth.to_csv('DataFrame_MoonEarth.csv')

In [90]:
# Neptune to Earth

In [91]:
import re
import datetime

# Create an empty DataFrame with column names
DataFrame_NeptuneEarth = pd.DataFrame(columns=['Date', 'Neptune_Earth'])

file = open("Neptune_earth.txt", "r")
i=0
while True:
    content=file.readline()
    if not content:
        break
#     print(content)
    r1 = re.findall(r"\d{4}/\d{2}/\d{2}",content)
    r2=re.findall(r"\,.*?\],",content)
    r3=re.findall(r"\,.*?\]",r2[0][1:])
    #################################################
    format = '%Y/%m/%d'
    # convert from string format to datetime format
    Dt = datetime.datetime.strptime(r1[0], format)

    Dis=float(r3[0][1:-1])

    DataFrame_NeptuneEarth.loc[i] = [Dt.date(),Dis]
    
    i=i+1  

file.close()

In [92]:
DataFrame_NeptuneEarth.shape

(23740, 2)

In [93]:
DataFrame_NeptuneEarth['Date'] = pd.to_datetime(DataFrame_NeptuneEarth['Date'])
DataFrame_NeptuneEarth.head()

Unnamed: 0,Date,Neptune_Earth
0,1970-01-03,31.04389
1,1970-01-05,31.02007
2,1970-01-07,30.9954
3,1970-01-09,30.96991
4,1970-01-11,30.94363


In [94]:
DataFrame_NeptuneEarth.to_csv('DataFrame_NeptuneEarth.csv')

In [95]:
#saturne to Earth

In [96]:
import re
import datetime

# Create an empty DataFrame with column names
DataFrame_SaturneEarth = pd.DataFrame(columns=['Date', 'Saturne_Earth'])

file = open("Saturne_Earth.txt", "r")
i=0
while True:
    content=file.readline()
    if not content:
        break
#     print(content)
    r1 = re.findall(r"\d{4}/\d{2}/\d{2}",content)
    r2=re.findall(r"\,.*?\],",content)
    r3=re.findall(r"\,.*?\]",r2[0][1:])
    #################################################
    format = '%Y/%m/%d'
    # convert from string format to datetime format
    Dt = datetime.datetime.strptime(r1[0], format)

    Dis=float(r3[0][1:-1])

    DataFrame_SaturneEarth.loc[i] = [Dt.date(),Dis]
    
    i=i+1  

file.close()

In [97]:
DataFrame_SaturneEarth.shape

(23740, 2)

In [98]:
DataFrame_SaturneEarth['Date'] = pd.to_datetime(DataFrame_SaturneEarth['Date'])
DataFrame_SaturneEarth.head()

Unnamed: 0,Date,Saturne_Earth
0,1970-01-03,8.83384
1,1970-01-05,8.86526
2,1970-01-07,8.89705
3,1970-01-09,8.92917
4,1970-01-11,8.96156


In [99]:
DataFrame_SaturneEarth.to_csv('DataFrame_SaturneEarth.csv')

In [100]:
#sun to earth

In [101]:
import re
import datetime
# Create an empty DataFrame with column names
DataFrame_SunEarth = pd.DataFrame(columns=['Date', 'Sun_Earth'])

file = open("Sun_Earth.txt", "r")
i=0
while True:
    content=file.readline()
    if not content:
        break
#     print(content)
    r1 = re.findall(r"\d{4}/\d{2}/\d{2}",content)
    r2=re.findall(r"\,.*?\],",content)
    r3=re.findall(r"\,.*?\]",r2[0][1:])
    #################################################
    format = '%Y/%m/%d'
    # convert from string format to datetime format
    Dt = datetime.datetime.strptime(r1[0], format)

    Dis=float(r3[0][1:-1])

    DataFrame_SunEarth.loc[i] = [Dt.date(),Dis]
    i=i+1

file.close()

In [102]:
DataFrame_SunEarth.shape

(23740, 2)

In [103]:
DataFrame_SunEarth['Date'] = pd.to_datetime(DataFrame_SunEarth['Date'])
DataFrame_SunEarth.head()

DataFrame_SunEarth.to_csv('DataFrame_SunEarth.csv')

In [104]:
#Uranius to earth

In [105]:
import re
import datetime
# Create an empty DataFrame with column names
DataFrame_UranEarth = pd.DataFrame(columns=['Date', 'Uran_Earth'])

file = open("Uranius_Earth.txt", "r")
i=0
while True:
    content=file.readline()
    if not content:
        break
#     print(content)
    r1 = re.findall(r"\d{4}/\d{2}/\d{2}",content)
    r2=re.findall(r"\,.*?\],",content)
    r3=re.findall(r"\,.*?\]",r2[0][1:])
    #################################################
    format = '%Y/%m/%d'
    # convert from string format to datetime format
    Dt = datetime.datetime.strptime(r1[0], format)

    Dis=float(r3[0][1:-1])

    DataFrame_UranEarth.loc[i] = [Dt.date(),Dis]
    i=i+1

file.close()

In [107]:
DataFrame_UranEarth.shape

(23740, 2)

In [108]:
DataFrame_UranEarth['Date'] = pd.to_datetime(DataFrame_UranEarth['Date'])
DataFrame_UranEarth.head()

DataFrame_UranEarth.to_csv('DataFrame_UranEarth.csv')

In [109]:
DataFrame_UranEarth.head()

Unnamed: 0,Date,Uran_Earth
0,1970-01-03,18.23127
1,1970-01-05,18.19692
2,1970-01-07,18.16268
3,1970-01-09,18.1286
4,1970-01-11,18.09472


In [110]:
import re
import datetime
# Create an empty DataFrame with column names
DataFrame_venusEarth = pd.DataFrame(columns=['Date', 'venus_Earth'])

file = open("venus_Earth.txt", "r")
i=0
while True:
    content=file.readline()
    if not content:
        break
#     print(content)
    r1 = re.findall(r"\d{4}/\d{2}/\d{2}",content)
    r2=re.findall(r"\,.*?\],",content)
    r3=re.findall(r"\,.*?\]",r2[0][1:])
    #################################################
    format = '%Y/%m/%d'
    # convert from string format to datetime format
    Dt = datetime.datetime.strptime(r1[0], format)

    Dis=float(r3[0][1:-1])

    DataFrame_venusEarth.loc[i] = [Dt.date(),Dis]
    i=i+1

file.close()

In [111]:
DataFrame_venusEarth.shape

(23740, 2)

In [112]:
DataFrame_venusEarth['Date'] = pd.to_datetime(DataFrame_venusEarth['Date'])
DataFrame_venusEarth.to_csv('DataFrame_venusEarth.csv')

In [113]:
DataFrame_venusEarth.head()

Unnamed: 0,Date,venus_Earth
0,1970-01-03,1.70068
1,1970-01-05,1.70254
2,1970-01-07,1.70423
3,1970-01-09,1.70576
4,1970-01-11,1.70713
