Extracting Data from https://coincodex.com/crypto/ripple/ and preprocessing and saving onto a MongoDB database

In [1]:
# import libraries
import pandas as pd
import numpy as np

In [2]:
# Define the file path. Adjust the username and path to match your local environment.
file_path = '/Users/moritzrichter/Desktop/cryptocurrency_analysis/data/Ripple_(XRP)/ripple_2019-04-16_2024-04-14.csv'

# Load the dataset from the specified file path
df = pd.read_csv(file_path)

# Display the first few rows of the DataFrame to ensure it has loaded correctly
df.head()

Unnamed: 0,Start,End,Open,High,Low,Close,Volume,Market Cap
0,2024-04-13,2024-04-14,0.545796,0.548373,0.430361,0.48005,19643260000.0,29044760000.0
1,2024-04-12,2024-04-13,0.609438,0.614963,0.520659,0.548342,17870760000.0,32475320000.0
2,2024-04-11,2024-04-12,0.616488,0.621427,0.603511,0.609407,18397740000.0,33791830000.0
3,2024-04-10,2024-04-11,0.613731,0.62112,0.596855,0.617208,18959960000.0,33639840000.0
4,2024-04-09,2024-04-10,0.616682,0.639545,0.601881,0.614056,14179040000.0,34045370000.0


In [3]:
# checking for mull values
df.isnull().sum().sum()
df.isna().sum()

Start         0
End           0
Open          0
High          0
Low           0
Close         0
Volume        0
Market Cap    0
dtype: int64

In [4]:
# Insert the "Name" column with all values set to "Bitcoin"
df.insert(0, 'Name', 'Ripple')

# Assume 'End' is the column to be used as 'Date' and that you need to rename 'Market Cap' to 'Marketcap'
df.rename(columns={'End': 'Date', 'Market Cap': 'Marketcap'}, inplace=True)

# Reorder the columns to the desired order
desired_columns = ['Name', 'Date', 'High', 'Low', 'Open', 'Close', 'Volume', 'Marketcap']
df = df[desired_columns]

# Save the modified DataFrame back to CSV or display it
df.to_csv('modified_data.csv', index=False)  # Save to new CSVprint(df.head())

In [5]:
df.head()

Unnamed: 0,Name,Date,High,Low,Open,Close,Volume,Marketcap
0,Ripple,2024-04-14,0.548373,0.430361,0.545796,0.48005,19643260000.0,29044760000.0
1,Ripple,2024-04-13,0.614963,0.520659,0.609438,0.548342,17870760000.0,32475320000.0
2,Ripple,2024-04-12,0.621427,0.603511,0.616488,0.609407,18397740000.0,33791830000.0
3,Ripple,2024-04-11,0.62112,0.596855,0.613731,0.617208,18959960000.0,33639840000.0
4,Ripple,2024-04-10,0.639545,0.601881,0.616682,0.614056,14179040000.0,34045370000.0


In [6]:
# converting date to datetime format
df['Date'] = pd.to_datetime(df['Date'])

In [7]:
# data being analyzed for last five years, so filtering data for that date range
dates = df['Date'].unique()
print(dates)

<DatetimeArray>
['2024-04-14 00:00:00', '2024-04-13 00:00:00', '2024-04-12 00:00:00',
 '2024-04-11 00:00:00', '2024-04-10 00:00:00', '2024-04-09 00:00:00',
 '2024-04-08 00:00:00', '2024-04-07 00:00:00', '2024-04-06 00:00:00',
 '2024-04-05 00:00:00',
 ...
 '2019-04-26 00:00:00', '2019-04-25 00:00:00', '2019-04-24 00:00:00',
 '2019-04-23 00:00:00', '2019-04-22 00:00:00', '2019-04-21 00:00:00',
 '2019-04-20 00:00:00', '2019-04-19 00:00:00', '2019-04-18 00:00:00',
 '2019-04-17 00:00:00']
Length: 1825, dtype: datetime64[ns]


In [8]:
# creating a new dataframe after applying the above filters
ripple_df = df
ripple_df

Unnamed: 0,Name,Date,High,Low,Open,Close,Volume,Marketcap
0,Ripple,2024-04-14,0.548373,0.430361,0.545796,0.480050,1.964326e+10,2.904476e+10
1,Ripple,2024-04-13,0.614963,0.520659,0.609438,0.548342,1.787076e+10,3.247532e+10
2,Ripple,2024-04-12,0.621427,0.603511,0.616488,0.609407,1.839774e+10,3.379183e+10
3,Ripple,2024-04-11,0.621120,0.596855,0.613731,0.617208,1.895996e+10,3.363984e+10
4,Ripple,2024-04-10,0.639545,0.601881,0.616682,0.614056,1.417904e+10,3.404537e+10
...,...,...,...,...,...,...,...,...
1820,Ripple,2019-04-21,0.335609,0.327818,0.334186,0.331460,7.643931e+08,1.398374e+10
1821,Ripple,2019-04-20,0.338381,0.331285,0.338381,0.334124,9.343012e+08,1.402296e+10
1822,Ripple,2019-04-19,0.347958,0.336499,0.338288,0.338405,1.132520e+09,1.427482e+10
1823,Ripple,2019-04-18,0.342660,0.324983,0.330490,0.337951,8.383319e+08,1.391597e+10


In [9]:
# Define the file path where you want to save the CSV
file_path = '/Users/moritzrichter/Desktop/cryptocurrency_analysis/data/Ripple_(XRP)/ripple(fiveyears).csv'

# Save the DataFrame to CSV at the specified path
ripple_df.to_csv(file_path, index=False)  # `index=False` to prevent writing row indices in the file

# Storing file in MongoDB (DB name : Finale)

In [11]:
# Read in mongodb server location as client
client = MongoClient("mongodb+srv://Group7:Finale@finalsegment1.690c0.mongodb.net/bitcoin_db?retryWrites=true&w=majority",tlsCAFile=certifi.where())

In [12]:
# Find Databases
client.list_database_names()

['Bitcoin_db',
 'Cardano_DB',
 'Ethereum_db',
 'Finale',
 'Tether_DB',
 'XRP_DB',
 'admin',
 'local']

In [13]:
# connecting to the db on MongoDb and creating collection in the db
db = client['Finale']
collection = db['Bitcoin']

In [14]:
# adding dataframe to mongoDb
bitcoin_df.reset_index(inplace=True)
bitcoin_df_dict = bitcoin_df.to_dict("records")

In [15]:
# Insert collection
collection.insert_many(bitcoin_df_dict)

<pymongo.results.InsertManyResult at 0x15bb6b587c0>