In [12]:
import requests
import json
import pandas as pd
from sqlalchemy import create_engine
from datetime import datetime

# Replace 'YOUR_API_KEY' with your actual CoinMarketCap API key
api_key = '42485936-1986-4342-9e0a-e854c8b0fe47'
url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest'

parameters = {
    'start': '1',
    'limit': '100',  # Number of cryptocurrencies to retrieve
    'convert': 'USD'  # Convert prices to USD
}

headers = {
    'Accepts': 'application/json',
    'X-CMC_PRO_API_KEY': api_key,
}

response = requests.get(url, headers=headers, params=parameters)
data = response.json()

# Pretty-print the JSON response
print(json.dumps(data, indent=4))


{
    "status": {
        "timestamp": "2024-12-16T02:47:16.891Z",
        "error_code": 0,
        "error_message": null,
        "elapsed": 14,
        "credit_count": 1,
        "notice": null,
        "total_count": 10311
    },
    "data": [
        {
            "id": 1,
            "name": "Bitcoin",
            "symbol": "BTC",
            "slug": "bitcoin",
            "num_market_pairs": 11841,
            "date_added": "2010-07-13T00:00:00.000Z",
            "tags": [
                "mineable",
                "pow",
                "sha-256",
                "store-of-value",
                "state-channel",
                "coinbase-ventures-portfolio",
                "three-arrows-capital-portfolio",
                "polychain-capital-portfolio",
                "binance-labs-portfolio",
                "blockchain-capital-portfolio",
                "boostvc-portfolio",
                "cms-holdings-portfolio",
                "dcg-portfolio",
                "dragonfl

In [13]:
organized_data = []
for item in data['data']:
    btc_info = {
        "id": item['id'],
        "name": item['name'],
        "symbol": item['symbol'],
        "cmc_rank": item['cmc_rank'],
        "price": item['quote']['USD']['price'],
        "volume_24h": item['quote']['USD']['volume_24h'],
        "market_cap": item['quote']['USD']['market_cap'],
        "market_cap_dominance": item['quote']['USD']['market_cap_dominance'],
        "circulating_supply": item['circulating_supply'],
        "max_supply": item['max_supply'],
        "percent_change_1h": item['quote']['USD']['percent_change_1h'],
        "percent_change_24h": item['quote']['USD']['percent_change_24h'],
        "percent_change_7d": item['quote']['USD']['percent_change_7d'],
        "last_updated": item['quote']['USD']['last_updated']
    }
    organized_data.append(btc_info)

# Convert to a DataFrame
df = pd.DataFrame(organized_data)

# Display the DataFrame
df.head()

Unnamed: 0,id,name,symbol,cmc_rank,price,volume_24h,market_cap,market_cap_dominance,circulating_supply,max_supply,percent_change_1h,percent_change_24h,percent_change_7d,last_updated
0,1,Bitcoin,BTC,1,104627.183051,60728420000.0,2071272000000.0,55.9182,19796690.0,21000000.0,-0.533732,2.810853,5.377919,2024-12-16T02:44:00.000Z
1,1027,Ethereum,ETH,2,3971.109988,30184840000.0,478321100000.0,12.9162,120450200.0,,-0.600361,2.228664,0.70773,2024-12-16T02:45:00.000Z
2,825,Tether USDt,USDT,3,0.999459,126808200000.0,140177800000.0,3.7844,140253700000.0,,0.003377,-0.022192,-0.129193,2024-12-16T02:44:00.000Z
3,52,XRP,XRP,4,2.414493,6615564000.0,138079700000.0,3.7289,57187870000.0,100000000000.0,-0.954826,-0.295733,-2.44946,2024-12-16T02:45:00.000Z
4,5426,Solana,SOL,5,223.013613,3426024000.0,106855600000.0,2.8856,479143900.0,,-1.212917,0.844453,-3.616969,2024-12-16T02:45:00.000Z


In [14]:
# Convert 'last_update' to a proper datetime object
df["last_updated"] = pd.to_datetime(df["last_updated"])

# Add 'pulled_at' field with the current timestamp
df["pulled_at"] = datetime.now()


In [15]:
# Database connection setup
DB_USER = "postgres"
#DB_PASSWORD = ""
DB_HOST = "localhost"
DB_PORT = "5432"
DB_NAME = "signal"



In [16]:
# Replace with your actual database URL
#DATABASE_URL = f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
DATABASE_URL = f"postgresql+psycopg2://{DB_USER}@{DB_HOST}:{DB_PORT}/{DB_NAME}"

# Create database engine
engine = create_engine(DATABASE_URL)

In [17]:
# Define the table name
table_name = "crypto_price_history"

In [18]:
# Function to upsert data
def upsert_crypto_data(df, table_name, engine):
    with engine.connect() as connection:
        # Query existing records (symbol + last_updated)
        query = f"""
        SELECT symbol, last_updated
        FROM {table_name};
        """
        existing_data = pd.read_sql(query, connection)
        
        # Merge to find new records
        new_data = df.merge(
            existing_data,
            on=["symbol", "last_updated"],
            how="left",
            indicator=True
        ).query('_merge == "left_only"').drop(columns=['_merge'])
        
        # Insert new records
        if not new_data.empty:
            new_data.to_sql(table_name, engine, if_exists="append", index=False)
            print(f"Inserted {len(new_data)} new records into {table_name}.")
        else:
            print("No new records to insert.")



In [19]:
df.describe()

Unnamed: 0,id,cmc_rank,price,volume_24h,market_cap,market_cap_dominance,circulating_supply,max_supply,percent_change_1h,percent_change_24h,percent_change_7d,pulled_at
count,100.0,100.0,100.0,100.0,100.0,100.0,100.0,39.0,100.0,100.0,100.0,100
mean,10750.11,50.5,1141.431342,2782319000.0,35927500000.0,0.970026,20645610000000.0,13176580000000.0,-1.35178,2.496177,-4.089482,2024-12-15 20:47:17.026378752
min,1.0,1.0,1e-06,706355.6,1204398000.0,0.0325,890885.6,1005577.0,-2.568851,-2.242258,-21.269521,2024-12-15 20:47:17.026379
25%,3135.5,25.75,0.623075,104406700.0,1675630000.0,0.045225,442872600.0,205350000.0,-1.759644,0.499999,-10.911598,2024-12-15 20:47:17.026379008
50%,6537.0,50.5,2.049288,271936700.0,3201977000.0,0.0865,1369880000.0,2000000000.0,-1.498384,1.864904,-6.750866,2024-12-15 20:47:17.026379008
75%,20533.75,75.25,12.926276,567557300.0,6607382000.0,0.1784,6500741000.0,10000000000.0,-1.007319,3.367078,-0.53625,2024-12-15 20:47:17.026379008
max,32880.0,100.0,104627.183051,126808200000.0,2071272000000.0,55.9182,968246400000000.0,420690000000000.0,0.383044,16.341314,66.211199,2024-12-15 20:47:17.026379
std,10066.048948,29.011492,10462.915696,14252400000.0,212281000000.0,5.731026,119970800000000.0,68595560000000.0,0.624468,3.241105,12.197047,


In [20]:
def validate_and_truncate(df):
    # Truncate string fields to match database limits
    df["symbol"] = df["symbol"].str[:10]
    df["name"] = df["name"].str[:50]

    # Clip numeric fields to match database constraints
    df["price"] = df["price"].clip(upper=10**12 - 1)
    df["volume_24h"] = df["volume_24h"].clip(upper=10**12 - 1)
    df["market_cap"] = df["market_cap"].clip(upper=10**12 - 1)
    df["market_cap_dominance"] = df["market_cap_dominance"].clip(upper=999.99)
    df["circulating_supply"] = df["circulating_supply"].clip(upper=10**12 - 1)
    df["max_supply"] = df["max_supply"].clip(upper=10**12 - 1)
    df["percent_change_1h"] = df["percent_change_1h"].clip(-9999.9999, 9999.9999)
    df["percent_change_24h"] = df["percent_change_24h"].clip(-9999.9999, 9999.9999)
    df["percent_change_7d"] = df["percent_change_7d"].clip(-9999.9999, 9999.9999)

    # Ensure no null values in required fields (if applicable)
    df = df.dropna(subset=["symbol", "last_updated"])

    return df


In [21]:
# Validate and truncate DataFrame
df_cleaned = validate_and_truncate(df)


In [22]:
# Call the function
upsert_crypto_data(df_cleaned, table_name, engine)


ValueError: You are trying to merge on datetime64[ns, UTC] and datetime64[ns] columns. If you wish to proceed you should use pd.concat