# Imports

In [36]:
import os
from ast import literal_eval
import pandas as pd
from sqlalchemy import create_engine, MetaData
from sqlalchemy import Table, Column, ForeignKey
from sqlalchemy import types
from sqlalchemy.orm import declarative_base, relationship
from dotenv import load_dotenv

load_dotenv()

True

# Merging Coins Files

In [11]:
coins_df = pd.read_csv("../datasets/coins.csv")
coins_df = coins_df.rename(
    columns={
        "Rank": "rank",
        "Name": "name",
        "Symbol": "symbol",
        "Main Link": "main_link",
        "Historical Link": "historical_link",
        "Price": "current_price",
        "Market Cap": "current_market_cap",
        "Volume(24 Hour)": "volume_24_hour",
        "Circulating Supply": "circulating_supply",
    }
)

coins_df["id"] = coins_df["rank"]
coins_cols = coins_df.columns.tolist()
coins_cols = coins_cols[-1:] + coins_cols[:-1]
coins_df = coins_df[coins_cols]

github_col = coins_df["GitHub Link"]
tags_col = coins_df["Tags"]
coins_df = coins_df.drop(columns=["GitHub Link", "Tags"])

coins_df.to_csv("datasets/coins", index=False)

In [10]:
coins_df.head()

Unnamed: 0,id,rank,name,symbol,current_price,current_market_cap,volume_24_hour,circulating_supply,main_link,historical_link
0,1,1,Bitcoin,BTC,26047.67,507090200000.0,12406050000.0,19467775,https://coinmarketcap.com/currencies/bitcoin/,https://coinmarketcap.com/currencies/bitcoin/h...
1,2,2,Ethereum,ETH,1652.94,198710000000.0,5396998000.0,120216444,https://coinmarketcap.com/currencies/ethereum/,https://coinmarketcap.com/currencies/ethereum/...
2,3,3,Tether USDt,USDT,0.9995,82807250000.0,19999150000.0,82849133566,https://coinmarketcap.com/currencies/tether/,https://coinmarketcap.com/currencies/tether/hi...
3,4,4,BNB,BNB,218.33,33590480000.0,423891100.0,153851122,https://coinmarketcap.com/currencies/bnb/,https://coinmarketcap.com/currencies/bnb/histo...
4,5,5,XRP,XRP,0.5262,27845880000.0,1067427000.0,52914193551,https://coinmarketcap.com/currencies/xrp/,https://coinmarketcap.com/currencies/xrp/histo...


# Creating Tags

In [22]:
t = pd.concat([coins_df[["rank", "name"]], tags_col], axis=1)

rows = []
for rank, _, tag_list in t.drop(t[t["Tags"] == "No Tags"].index).values:
    for tag in literal_eval(tag_list):
        rows.append({"coin_id": rank, "tag": tag})

tags_df = pd.DataFrame.from_dict(rows)
tags_df.to_csv("datasets/tags", index=False)

# Creating Github Links

In [21]:
github_df = pd.concat([coins_df["rank"], github_col], axis=1)

github_df = github_df.rename(
    columns={"rank": "coin_id", "GitHub Link": "link"}
)
github_df.to_csv("datasets/githubs", index=False)

# Merging All History Files

In [25]:
histories_file_list = os.listdir("../datasets/history/")
history_dfs = []

for history in histories_file_list:
    dir = "../datasets/history/" + history
    df = pd.read_csv(dir, sep=";")
    df["name"] = history.split("_")[0]
    history_dfs.append(df)

histories_df = pd.concat(history_dfs)
histories_df = histories_df.reset_index().drop(columns=["index"])

histories_df = coins_df.merge(histories_df, on="name")[
    [*histories_df.columns, "id"]
]
histories_df = histories_df.rename(
    columns={
        "timeOpen": "time_open",
        "timeClose": "time_close",
        "timeHigh": "time_high",
        "timeLow": "time_low",
        "marketCap": "market_cap",
        "id": "coin_id",
    }
)

histories_cols = histories_df.columns.tolist()
histories_cols = histories_cols[-1:] + histories_cols[:-1]
histories_df = histories_df[histories_cols]
histories_df = histories_df.drop(columns=["name"])
histories_df.to_csv("datasets/histories", index=False)

# Final Data Overview

In [48]:
coins = pd.read_csv("./datasets/coins")
tags = pd.read_csv("./datasets/tags")
histories = pd.read_csv("./datasets/histories", parse_dates=["time_open", "time_close", "time_high", "time_low", "timestamp"])
githubs = pd.read_csv("./datasets/githubs")

In [49]:
coins.columns

Index(['id', 'rank', 'name', 'symbol', 'current_price', 'current_market_cap',
       'volume_24_hour', 'circulating_supply', 'main_link', 'historical_link'],
      dtype='object')

In [50]:
tags.columns

Index(['coin_id', 'tag'], dtype='object')

In [51]:
githubs.columns

Index(['coin_id', 'link'], dtype='object')

In [52]:
histories.columns

Index(['coin_id', 'time_open', 'time_close', 'time_high', 'time_low', 'open',
       'high', 'low', 'close', 'volume', 'market_cap', 'timestamp'],
      dtype='object')

In [53]:
coins.shape, tags.shape, histories.shape

((200, 10), (557, 2), (63836, 12))

# Database Creation

In [54]:
USERNAME = os.getenv("MYSQL_USERNAME")
PASSWORD = os.getenv("MYSQL_PASSWORD")
SERVER = os.getenv("MYSQL_SERVER")
DATABASE = os.getenv("MYSQL_DATABASE")

Base = declarative_base()

meta = MetaData()
engine = create_engine(
    f"mysql+pymysql://{USERNAME}:{PASSWORD}@{SERVER}:3306/{DATABASE}", echo=True
)
connection = engine.connect()

2023-09-03 19:38:21,901 INFO sqlalchemy.engine.Engine SELECT DATABASE()
2023-09-03 19:38:21,909 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-09-03 19:38:21,921 INFO sqlalchemy.engine.Engine SELECT @@sql_mode
2023-09-03 19:38:21,927 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-09-03 19:38:21,944 INFO sqlalchemy.engine.Engine SELECT @@lower_case_table_names
2023-09-03 19:38:21,947 INFO sqlalchemy.engine.Engine [raw sql] {}


### Coin Table

In [55]:
coins.head(1)

Unnamed: 0,id,rank,name,symbol,current_price,current_market_cap,volume_24_hour,circulating_supply,main_link,historical_link
0,1,1,Bitcoin,BTC,26047.67,507090200000.0,12406050000.0,19467775,https://coinmarketcap.com/currencies/bitcoin/,https://coinmarketcap.com/currencies/bitcoin/h...


In [56]:
class Coin(Base):
    __tablename__ = "coins"

    id = Column(types.Integer, primary_key=True, autoincrement=True)
    rank = Column(types.BigInteger)
    name = Column(types.String(255), primary_key=True)
    symbol = Column(types.String(255))
    main_link = Column(types.Text)
    historical_link = Column(types.Text)
    current_price = Column(types.Float(32))
    current_market_cap = Column(types.Float(32))
    volume_24_hour = Column(types.Float(32))
    circulating_supply = Column(types.BigInteger)

    tags = relationship("Tag", back_populates="coin")
    history = relationship("History", back_populates="coin")

    def __repr__(self):
        return f"Coin Name: {self.name} with Rank: {self.rank}"

### Tags Table

In [57]:
tags.head(1)

Unnamed: 0,coin_id,tag
0,1,Mineable


In [58]:
class Tag(Base):
    __tablename__ = "tags"

    id = Column(types.BigInteger, primary_key=True, autoincrement=True)
    coin_id = Column(types.Integer, ForeignKey("coins.id"), nullable=False)
    tag = Column(types.String(255))

    coin = relationship("Coin", backref="tags")

    def __repr__(self):
        return f"Tag Name: {self.tag}"

### Githubs Table

In [59]:
githubs.head(1)

Unnamed: 0,coin_id,link
0,1,https://github.com/bitcoin/bitcoin


In [60]:
class Github(Base):
    __tablename__ = "githubs"

    id = Column(types.Integer, primary_key=True, autoincrement=True)
    coin_id = Column(types.Integer, ForeignKey("coins.id"), nullable=False)
    link = Column(types.Text)

    coin = relationship("Coin", backref="tags")

    def __repr__(self):
        return f"Github Link: {self.link}"

### History Table

In [61]:
histories.head(1)

Unnamed: 0,coin_id,time_open,time_close,time_high,time_low,open,high,low,close,volume,market_cap,timestamp
0,1,2023-08-31 00:00:00+00:00,2023-08-31 23:59:59.999000+00:00,2023-08-31 11:43:00+00:00,2023-08-31 21:09:00+00:00,27301.929317,27456.079001,25752.929947,25931.472893,20181000000.0,504957600000.0,2023-08-31 23:59:59.999000+00:00


In [62]:
class History(Base):
    __tablename__ = "history"

    id = Column(types.BigInteger, primary_key=True, autoincrement=True)
    coin_id = Column(types.Integer, ForeignKey("coins.id"), nullable=False)
    time_open = Column(types.DateTime)
    time_close = Column(types.DateTime)
    time_high = Column(types.DateTime)
    time_low = Column(types.DateTime)
    open = Column(types.Float(32))
    high = Column(types.Float(32))
    low = Column(types.Float(32))
    close = Column(types.Float(32))
    volume = Column(types.Float(32))
    market_cap = Column(types.Float(32))
    timestamp = Column(types.DateTime)

    history = relationship("Coin", back_populates="history")

    def __repr__(self):
        return f"Coin Name: {self.coin_id}"

### Tables Creation

In [63]:
Base.metadata.create_all(engine)

2023-09-03 19:38:22,362 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-09-03 19:38:22,364 INFO sqlalchemy.engine.Engine DESCRIBE `coinmarketplace`.`coins`
2023-09-03 19:38:22,365 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-09-03 19:38:22,368 INFO sqlalchemy.engine.Engine DESCRIBE `coinmarketplace`.`tags`
2023-09-03 19:38:22,369 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-09-03 19:38:22,371 INFO sqlalchemy.engine.Engine DESCRIBE `coinmarketplace`.`githubs`
2023-09-03 19:38:22,372 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-09-03 19:38:22,375 INFO sqlalchemy.engine.Engine DESCRIBE `coinmarketplace`.`history`
2023-09-03 19:38:22,376 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-09-03 19:38:22,379 INFO sqlalchemy.engine.Engine 
CREATE TABLE coins (
	id INTEGER NOT NULL AUTO_INCREMENT, 
	`rank` BIGINT, 
	name VARCHAR(255) NOT NULL, 
	symbol VARCHAR(255), 
	main_link TEXT, 
	historical_link TEXT, 
	current_price FLOAT(32), 
	current_market_cap FLOAT(32), 
	volume_24_hour F

# Data Insertion

In [64]:
coins.to_sql("coins", engine, if_exists="append", index=False)

2023-09-03 19:38:28,424 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-09-03 19:38:28,429 INFO sqlalchemy.engine.Engine DESCRIBE `coinmarketplace`.`coins`
2023-09-03 19:38:28,430 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-09-03 19:38:28,440 INFO sqlalchemy.engine.Engine INSERT INTO coins (id, `rank`, name, symbol, current_price, current_market_cap, volume_24_hour, circulating_supply, main_link, historical_link) VALUES (%(id)s, %(rank)s, %(name)s, %(symbol)s, %(current_price)s, %(current_market_cap)s, %(volume_24_hour)s, %(circulating_supply)s, %(main_link)s, %(historical_link)s)
2023-09-03 19:38:28,441 INFO sqlalchemy.engine.Engine [generated in 0.00245s] [{'id': 1, 'rank': 1, 'name': 'Bitcoin', 'symbol': 'BTC', 'current_price': 26047.67, 'current_market_cap': 507090156466.71, 'volume_24_hour': 12406045117.51, 'circulating_supply': 19467775, 'main_link': 'https://coinmarketcap.com/currencies/bitcoin/', 'historical_link': 'https://coinmarketcap.com/currencies/bitcoin/historica

200

In [65]:
githubs.to_sql("githubs", engine, if_exists="append", index=False)

2023-09-03 19:38:30,468 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-09-03 19:38:30,470 INFO sqlalchemy.engine.Engine DESCRIBE `coinmarketplace`.`githubs`
2023-09-03 19:38:30,471 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-09-03 19:38:30,480 INFO sqlalchemy.engine.Engine INSERT INTO githubs (coin_id, link) VALUES (%(coin_id)s, %(link)s)
2023-09-03 19:38:30,482 INFO sqlalchemy.engine.Engine [generated in 0.00292s] [{'coin_id': 1, 'link': 'https://github.com/bitcoin/bitcoin'}, {'coin_id': 2, 'link': 'https://github.com/ethereum/go-ethereum'}, {'coin_id': 3, 'link': 'No GitHub Link'}, {'coin_id': 4, 'link': 'https://github.com/bnb-chain'}, {'coin_id': 5, 'link': 'https://github.com/ripple/rippled'}, {'coin_id': 6, 'link': 'https://github.com/centrehq/centre-tokens'}, {'coin_id': 7, 'link': 'https://cardanoupdates.com/'}, {'coin_id': 8, 'link': 'https://github.com/dogecoin/dogecoin'}  ... displaying 10 of 200 total bound parameter sets ...  {'coin_id': 199, 'link': 'https://gith

200

In [66]:
tags.to_sql("tags", engine, if_exists="append", index=False)

2023-09-03 19:38:33,033 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-09-03 19:38:33,036 INFO sqlalchemy.engine.Engine DESCRIBE `coinmarketplace`.`tags`
2023-09-03 19:38:33,038 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-09-03 19:38:33,047 INFO sqlalchemy.engine.Engine INSERT INTO tags (coin_id, tag) VALUES (%(coin_id)s, %(tag)s)
2023-09-03 19:38:33,049 INFO sqlalchemy.engine.Engine [generated in 0.00253s] [{'coin_id': 1, 'tag': 'Mineable'}, {'coin_id': 1, 'tag': 'PoW'}, {'coin_id': 1, 'tag': 'SHA-256'}, {'coin_id': 2, 'tag': 'PoS'}, {'coin_id': 2, 'tag': 'Smart Contracts'}, {'coin_id': 2, 'tag': 'Ethereum Ecosystem'}, {'coin_id': 3, 'tag': 'Payments'}, {'coin_id': 3, 'tag': 'Stablecoin'}  ... displaying 10 of 557 total bound parameter sets ...  {'coin_id': 200, 'tag': 'SEC Security Token'}, {'coin_id': 200, 'tag': 'Alleged SEC Securities'}]
2023-09-03 19:38:33,076 INFO sqlalchemy.engine.Engine COMMIT


557

In [67]:
histories.to_sql("history", engine, if_exists="append", index=False)

2023-09-03 19:38:35,053 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-09-03 19:38:35,058 INFO sqlalchemy.engine.Engine DESCRIBE `coinmarketplace`.`history`
2023-09-03 19:38:35,059 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-09-03 19:38:36,962 INFO sqlalchemy.engine.Engine INSERT INTO history (coin_id, time_open, time_close, time_high, time_low, open, high, low, close, volume, market_cap, timestamp) VALUES (%(coin_id)s, %(time_open)s, %(time_close)s, %(time_high)s, %(time_low)s, %(open)s, %(high)s, %(low)s, %(close)s, %(volume)s, %(market_cap)s, %(timestamp)s)
2023-09-03 19:38:36,966 INFO sqlalchemy.engine.Engine [generated in 1.48554s] [{'coin_id': 1, 'time_open': datetime.datetime(2023, 8, 31, 0, 0, tzinfo=datetime.timezone.utc), 'time_close': datetime.datetime(2023, 8, 31, 23, 59, 59, 999000, tzinfo=datetime.timezone.utc), 'time_high': datetime.datetime(2023, 8, 31, 11, 43, tzinfo=datetime.timezone.utc), 'time_low': datetime.datetime(2023, 8, 31, 21, 9, tzinfo=datetime.time

63836