In [19]:
# Import required libraries
import requests
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import time
from dotenv import load_dotenv


In [20]:
# Crypto configuration
CRYPTOCURRENCIES = [
    {'id': 'bitcoin', 'symbol': 'btc', 'name': 'Bitcoin'},
    {'id': 'ethereum', 'symbol': 'eth', 'name': 'Ethereum'}, 
    {'id': 'dogecoin', 'symbol': 'doge', 'name': 'Dogecoin'}
]

# Create output directory for CSV files
OUTPUT_DIR = '../data'
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [21]:
load_dotenv()
COINGECKO_API_KEY = os.environ.get('COINGECKO_API_KEY')

# S3 Configuration
AWS_ACCESS_KEY = os.environ.get('AWS_ACCESS_KEY_ID')
AWS_SECRET_KEY = os.environ.get('AWS_SECRET_ACCESS_KEY')
S3_BUCKET = os.environ.get('S3_BUCKET')
S3_PREFIX = 'crypto/coingecko/'

In [22]:
CRYPTOCURRENCIES = [
    {'id': 'bitcoin', 'symbol': 'btc', 'name': 'Bitcoin'},
    {'id': 'ethereum', 'symbol': 'eth', 'name': 'Ethereum'}, 
    {'id': 'dogecoin', 'symbol': 'doge', 'name': 'Dogecoin'}
]

# Create output directory for CSV files
OUTPUT_DIR = '../data'
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [23]:
def fetch_coingecko_data(coin_id, vs_currency='usd', days='max'):
    """
    Fetch cryptocurrency market data from CoinGecko API with API key
    
    Args:
        coin_id (str): CoinGecko ID for the cryptocurrency (e.g., 'bitcoin')
        vs_currency (str): The target currency (default: 'usd')
        days (str/int): Data up to number of days ago (default: 'max' for all data)
    
    Returns:
        pandas.DataFrame: DataFrame containing the cryptocurrency data
    """
    # CoinGecko API endpoint
    url = f"https://api.coingecko.com/api/v3/coins/{coin_id}/market_chart"
    
    # Parameters for the API request
    params = {
        'vs_currency': vs_currency,
        'days': days,
        'interval': 'daily'  # Daily data
    }
    
    # Add API key to headers
    headers = {
        'x-cg-demo-api-key': COINGECKO_API_KEY
    }
    
    print(f"Fetching {coin_id} data from CoinGecko...")
    
    # Make the API request
    response = requests.get(url, params=params, headers=headers)
    
    # Check if request was successful
    if response.status_code == 200:
        data = response.json()
        
        # Extract price data
        prices = data.get('prices', [])
        
        # Create DataFrame
        df = pd.DataFrame(prices, columns=['timestamp', 'price'])
        
        # Convert timestamp (milliseconds) to datetime
        df['Date'] = pd.to_datetime(df['timestamp'], unit='ms').dt.date
        
        # Add volume and market cap if available
        if 'total_volumes' in data:
            volumes = pd.DataFrame(data['total_volumes'], columns=['timestamp', 'Volume'])
            df['Volume'] = volumes['Volume']
        
        if 'market_caps' in data:
            market_caps = pd.DataFrame(data['market_caps'], columns=['timestamp', 'Market_Cap'])
            df['Market_Cap'] = market_caps['Market_Cap']
        
        # Drop the timestamp column
        df = df.drop('timestamp', axis=1)
        
        # Add source and coin information
        df['Source'] = 'CoinGecko'
        df['Symbol'] = coin_id.upper()
        
        # Filter data from 2020-01-01 onwards
        df['Date'] = pd.to_datetime(df['Date'])
        df = df[df['Date'] >= '2020-01-01']
        
        print(f"Retrieved {len(df)} records")
        
        return df
    else:
        print(f"Error: API request failed with status code {response.status_code}")
        print(f"Response: {response.text}")
        return pd.DataFrame()



In [24]:
def upload_to_s3(local_file, s3_file):
    """
    Upload a file to S3 bucket
    
    Args:
        local_file (str): Path to local file
        s3_file (str): Path in S3 bucket
    
    Returns:
        bool: True if upload was successful, False otherwise
    """
    s3_client = boto3.client(
        's3',
        aws_access_key_id=AWS_ACCESS_KEY,
        aws_secret_access_key=AWS_SECRET_KEY
    )
    
    try:
        s3_client.upload_file(local_file, S3_BUCKET, s3_file)
        print(f"Upload Successful: {s3_file}")
        return True
    except FileNotFoundError:
        print(f"The file {local_file} was not found")
        return False
    except NoCredentialsError:
        print("Credentials not available")
        return False
    except Exception as e:
        print(f"Error uploading to S3: {str(e)}")
        return False



In [25]:
# Fetch data for each cryptocurrency
all_data = {}
for crypto in CRYPTOCURRENCIES:
    coin_id = crypto['id']
    symbol = crypto['symbol']
    crypto_name = crypto['name']
    
    # Fetch data
    df = fetch_coingecko_data(coin_id)
    
    if not df.empty:
        all_data[symbol] = df
        
        # Display basic stats
        print(f"\n{crypto_name} ({symbol}) Stats:")
        print(f"Date Range: {df['Date'].min()} to {df['Date'].max()}")
        print(f"Current Price: ${df['price'].iloc[-1]:.2f}")
        print(f"All-time High: ${df['price'].max():.2f}")
        print(f"All-time Low: ${df['price'].min():.2f}")
        
       

Fetching bitcoin data from CoinGecko...
Error: API request failed with status code 401
Response: {"error":{"status":{"timestamp":"2025-02-27T22:55:28.326+00:00","error_code":10012,"error_message":"Your request exceeds the allowed time range. Public API users are limited to querying historical data within the past 365 days. Upgrade to a paid plan to enjoy full historical data access: https://www.coingecko.com/en/api/pricing. "}}}
Fetching ethereum data from CoinGecko...
Error: API request failed with status code 401
Response: {"error":{"status":{"timestamp":"2025-02-27T22:55:28.468+00:00","error_code":10012,"error_message":"Your request exceeds the allowed time range. Public API users are limited to querying historical data within the past 365 days. Upgrade to a paid plan to enjoy full historical data access: https://www.coingecko.com/en/api/pricing. "}}}
Fetching dogecoin data from CoinGecko...
Error: API request failed with status code 401
Response: {"error":{"status":{"timestamp":"20

In [None]:
 # Save to CSV
        timestamp = datetime.now().strftime('%Y%m%d')
        csv_filename = f"{OUTPUT_DIR}/{symbol.lower()}_{timestamp}.csv"
        df.to_csv(csv_filename, index=False)
        print(f"Saved to {csv_filename}")
        
        # Upload to S3
        s3_key = f"{S3_PREFIX}{symbol.lower()}_{timestamp}.csv"
        upload_to_s3(csv_filename, s3_key)
    
    # Sleep to avoid rate limiting
    time.sleep(2)  # Increased sleep time to handle rate limits

# Combine all data into a single CSV for convenience
combined_data = pd.concat(all_data.values())
combined_data = combined_data.sort_values(['Symbol', 'Date'])

# Save combined data
timestamp = datetime.now().strftime('%Y%m%d')
combined_filename = f"{OUTPUT_DIR}/combined_crypto_coingecko_{timestamp}.csv"
combined_data.to_csv(combined_filename, index=False)
print(f"\nSaved combined data to {combined_filename}")

# Upload combined data to S3
s3_key = f"{S3_PREFIX}combined_crypto_coingecko_{timestamp}.csv"
upload_to_s3(combined_filename, s3_key)

# Plot the price trends
plt.figure(figsize=(14, 8))

for symbol, df in all_data.items():
    plt.plot(df['Date'], df['price'], label=symbol)

plt.title('Cryptocurrency Prices Over Time (CoinGecko)', fontsize=16)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Price (USD)', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend()
plt.yscale('log')  # Log scale to better visualize different price ranges
plt.tight_layout()
plt.show()

# Plot Market Cap
plt.figure(figsize=(14, 8))

for symbol, df in all_data.items():
    if 'Market_Cap' in df.columns:
        plt.plot(df['Date'], df['Market_Cap'], label=f"{symbol} Market Cap")

plt.title('Cryptocurrency Market Cap Over Time', fontsize=16)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Market Cap (USD)', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend()
plt.yscale('log')  # Log scale to better visualize different market caps
plt.tight_layout()
plt.show()

# Calculate and display correlation matrix based on prices
price_data = pd.DataFrame()
for symbol, df in all_data.items():
    price_data[symbol] = df.set_index('Date')['price']

correlation_matrix = price_data.corr()
print("\nCorrelation Matrix between Cryptocurrencies (CoinGecko):")
print(correlation_matrix)

print("\nAll CoinGecko cryptocurrency data has been processed and uploaded to S3!")
print(f"S3 Bucket: {S3_BUCKET}")
print(f"S3 Prefix: {S3_PREFIX}")