In [None]:
import pandas as pd
import json
from datetime import datetime, timedelta
import os

In [None]:
# Directory containing all token data files
data_dir = 'market caps'

# Initialize an empty list to store data from all tokens
all_tokens_data = []

# Process each file in the directory
for file_name in os.listdir(data_dir):
    if file_name.endswith('.txt'):
        file_path = os.path.join(data_dir, file_name)
        
        with open(file_path, 'r') as file:
            data = json.load(file)

        # Extract the token name and series data
        token_name = file_name
        data_points = data['data']['series'][0]['points']

        # Convert the data into a DataFrame
        data_df = pd.DataFrame(data_points, columns=['timestamp', 'market_cap'])
        data_df['token'] = token_name

        # Step 1: Get market caps and timestamps into a DataFrame (already done above)

        # Step 2: Remove rows with zeros and NAs
        data_df = data_df[(data_df['market_cap'] != 0) & (~data_df['market_cap'].isna())]

        # Step 3: Populate hourly data
        data_df['timestamp'] = pd.to_datetime(data_df['timestamp'], unit='s')
        final_data = []
        for i in range(len(data_df) - 1):
            current_row = data_df.iloc[i]
            next_row = data_df.iloc[i + 1]

            current_time = current_row['timestamp']
            next_time = next_row['timestamp']
            market_cap = next_row['market_cap']  # Use next week's market cap

            # Generate hourly timestamps between current_time and next_time
            while current_time < next_time:
                final_data.append({'timestamp': current_time, 'market_cap': market_cap, 'token': current_row['token']})
                current_time += timedelta(hours=1)

        # Add the last timestamp of the last data point
        final_data.append({'timestamp': data_df.iloc[-1]['timestamp'], 'market_cap': data_df.iloc[-1]['market_cap'], 'token': data_df.iloc[-1]['token']})

        # Convert to DataFrame
        hourly_df = pd.DataFrame(final_data)

        # Append to all tokens data
        all_tokens_data.append(hourly_df)

        print(f"Processed hourly market cap data for {token_name} .")

# Combine all tokens data into a single DataFrame
combined_df = pd.concat(all_tokens_data, ignore_index=True)

Processed hourly market cap data for 1inchusdt.txt .
Processed hourly market cap data for balusdt.txt .
Processed hourly market cap data for batusdt.txt .
Processed hourly market cap data for crvusdt.txt .
Processed hourly market cap data for enjusdt.txt .
Processed hourly market cap data for ensusdt.txt .
Processed hourly market cap data for kncusdt.txt .
Processed hourly market cap data for linkusdt.txt .
Processed hourly market cap data for manausdt.txt .
Processed hourly market cap data for mkrusdt.txt .
Processed hourly market cap data for renusdt.txt .
Processed hourly market cap data for snxusdt.txt .
Processed hourly market cap data for uniusdt.txt .
Processed hourly market cap data for wbtcusdt.txt .
Processed hourly market cap data for yfiusdt.txt .
Processed hourly market cap data for zrxusdt.txt .


In [20]:
def token_clean(name):
     name = name.replace('.txt', '')
     name = name.upper()
     return name

combined_df['token'] = combined_df['token'].apply(token_clean)

In [21]:
# ens, knc, uni, yfi
for t in combined_df['token'].unique():
     print(t)
     print(len(combined_df[combined_df['token'] == t]))
     print(min(combined_df[combined_df['token'] == t]['timestamp']))
     print(max(combined_df[combined_df['token'] == t]['timestamp']))

1INCHUSDT
34777
2020-12-21 00:00:00
2024-12-09 00:00:00
BALUSDT
39145
2020-06-22 00:00:00
2024-12-09 00:00:00
BATUSDT
43681
2019-12-16 00:00:00
2024-12-09 00:00:00
CRVUSDT
37969
2020-08-10 00:00:00
2024-12-09 00:00:00
ENJUSDT
43681
2019-12-16 00:00:00
2024-12-09 00:00:00
ENSUSDT
27049
2021-11-08 00:00:00
2024-12-09 00:00:00
KNCUSDT
29569
2021-07-26 00:00:00
2024-12-09 00:00:00
LINKUSDT
43681
2019-12-16 00:00:00
2024-12-09 00:00:00
MANAUSDT
43681
2019-12-16 00:00:00
2024-12-09 00:00:00
MKRUSDT
43681
2019-12-16 00:00:00
2024-12-09 00:00:00
RENUSDT
43681
2019-12-16 00:00:00
2024-12-09 00:00:00
SNXUSDT
43681
2019-12-16 00:00:00
2024-12-09 00:00:00
UNIUSDT
37129
2020-09-14 00:00:00
2024-12-09 00:00:00
WBTCUSDT
37465
2020-08-31 00:00:00
2024-12-09 00:00:00
YFIUSDT
38641
2020-07-13 00:00:00
2024-12-09 00:00:00
ZRXUSDT
43681
2019-12-16 00:00:00
2024-12-09 00:00:00


In [24]:
combined_df.describe()

Unnamed: 0,timestamp,market_cap
count,631192,631192.0
mean,2022-08-26 00:07:36.439244544,1750222000.0
min,2019-12-16 00:00:00,23293290.0
25%,2021-07-17 15:00:00,257078500.0
50%,2022-09-09 04:00:00,527345300.0
75%,2023-10-25 02:00:00,1422480000.0
max,2024-12-09 00:00:00,22150670000.0
std,,2954620000.0


In [25]:
combined_df.to_csv('market_cap_data.csv')

### End of Notebook