In [1]:


import pandas as pd
import hopsworks
from utils import *
import json
import os
import warnings
from dotenv import load_dotenv
import datetime
#from datetime import timezone

warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()
os.environ["HOPSWORKS_API_KEY"] = os.getenv("HOPSWORKS_API_KEY")


project = hopsworks.login()

2024-12-22 14:32:48,669 INFO: Initializing external client
2024-12-22 14:32:48,669 INFO: Base URL: https://c.app.hopsworks.ai:443
2024-12-22 14:32:50,138 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1160346


In [3]:

fs = project.get_feature_store() 
#secrets = secrets_api(project.name)

# This line will fail if you have not registered the AQI_API_KEY as a secret in Hopsworks
#CRYPTO_API_KEY = secrets.get_secret("CRYPTO_API_KEY").value

CRYPTO_API_KEY = os.getenv("CRYPTO_API_KEY")

today = datetime.date.today()

today


datetime.date(2024, 12, 22)

In [4]:

# Step 2: Retrieve the feature group by name and version
solana_feature_group = fs.get_feature_group(
    name="solana",  
    version=7
)

# Step 3: Read the data from the feature group
s_data_df = solana_feature_group.select(["date"]).read()

# Step 4: Find the maximum date
solana_max_date = s_data_df['date'].max()

# Step 5: Print the result
print(f"The maximum date is: {solana_max_date}")

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.72s) 
The maximum date is: 2024-12-22 00:00:00+00:00


In [5]:

# Step 2: Retrieve the feature group by name and version
bitcoin_feature_group = fs.get_feature_group(
    name="bitcoin",  
    version=7
)

# Step 3: Read the data from the feature group
b_data_df = bitcoin_feature_group.select(["date"]).read()

# Step 4: Find the maximum date
bitcoin_max_date = b_data_df['date'].max()

# Step 5: Print the result
print(f"The maximum date is: {bitcoin_max_date}")

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.88s) 
The maximum date is: 2024-12-22 00:00:00+00:00


In [6]:
# Step 2: Retrieve the feature group by name and version
fng_feature_group = fs.get_feature_group(
    name="f_n_g_index", 
    version=6
)

# Step 3: Read the data from the feature group
fng_data_df = fng_feature_group.select(["date"]).read()

# Step 4: Find the maximum date
fng_max_date = fng_data_df['date'].max()

# Step 5: Print the result
print(f"The maximum date is: {fng_max_date}")

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.65s) 
The maximum date is: 2024-12-22 00:00:00+00:00


### Getting missing data

### Request solana price from last available data till now

In [7]:
# convert the date to a naive date
solana_max_date_naive = solana_max_date.date()

# Calculate the difference in days, so how many days ago was the last data point
solana_difference = (today - solana_max_date_naive).days
print(f"Difference in days: {solana_difference}")

Difference in days: 0


In [8]:
url = "https://min-api.cryptocompare.com/data/v2/histoday"

# Load environment variables from the .env file
load_dotenv()
crypto_api_key = os.getenv("CRYPTO_API_KEY")


# Set the parameters for the API request
params = {
    "fsym": "SOL",  # From symbol (e.g., Bitcoin)
    "tsym": "USD",  # To symbol (e.g., US Dollar)
    "limit": solana_difference,    # Number of days of data to retrieve
    "toTs": None,   # Timestamp for the end of the period (None for latest data)
    "api_key": crypto_api_key
}

sol_response = trigger_request(url, params)
sol_response.keys()



In [9]:
sol_response["Data"]

{}

### Request bitcoin price from last available data till now

In [10]:
# convert the date to a naive date
bitcoin_max_date_naive = bitcoin_max_date.date()

# Calculate the difference in days, so how many days ago was the last data point
bitcoin_difference = (today - bitcoin_max_date_naive).days
print(f"Difference in days: {bitcoin_difference}")

Difference in days: 0


In [11]:
url = "https://min-api.cryptocompare.com/data/v2/histoday"

# Load environment variables from the .env file
load_dotenv()
crypto_api_key = os.getenv("CRYPTO_API_KEY")


# Set the parameters for the API request
params = {
    "fsym": "BTC",  # From symbol (e.g., Bitcoin)
    "tsym": "USD",  # To symbol (e.g., US Dollar)
    "limit": bitcoin_difference,    # Number of days of data to retrieve
    "toTs": None,   # Timestamp for the end of the period (None for latest data)
    "api_key": crypto_api_key
}

btc_response = trigger_request(url, params)
btc_response.keys()



In [12]:
btc_response["Data"]

{}

### Request fear and greed index values from last available data till now

In [13]:

# convert the date to a naive date
fng_max_date_naive = fng_max_date.date()

# Calculate the difference in days, so how many days ago was the last data point
fng_difference = (today - fng_max_date_naive).days
print(f"Difference in days: {fng_difference}")

Difference in days: 0


In [14]:
url = "https://api.alternative.me/fng/?"

# Load environment variables from the .env file
load_dotenv()
crypto_api_key = os.getenv("CRYPTO_API_KEY")


# Set the parameters for the API request
params = {
    "limit": fng_difference    # Number of days of data to retrieve
}

fng_response = trigger_request(url, params)
fng_response.keys()

dict_keys(['name', 'data', 'metadata'])

### Now we need to use the responses to update the data in Hopsworks

In [15]:


#create an empty dataframe with the new data with the columns ['date', 'price', 'open', 'high', 'low'], so that we can append new data later
#solana_new_data = pd.DataFrame(columns=['date', 'price', 'open', 'high', 'low'])
solana_new_data = pd.DataFrame(columns=['date', 'open'])

if solana_difference > 0:
    #Now we will iterate throught the response and extract the data for the missing days
    for i in sol_response["Data"]["Data"]:
        timestamp = i["time"]
        # Convert to datetime object
        date= datetime.fromtimestamp(timestamp, datetime.timezone.utc)
        
        if date <= solana_max_date:
            #print("Data already exists")
            continue
        else:
            #here we fill up the dataframe with the new data 
            new_row = pd.DataFrame([{'date': date, 'open': i["open"]}])
            solana_new_data = pd.concat([solana_new_data, new_row], ignore_index=True)

    #here we will append the new data to the existing feature group
    solana_feature_group.insert(solana_new_data)



In [16]:
#create an empty dataframe with the new data with the columns ['date', 'price', 'open', 'high', 'low'], so that we can append new data later
#bitcoin_new_data = pd.DataFrame(columns=['date', 'price', 'open', 'high', 'low'])
bitcoin_new_data = pd.DataFrame(columns=['date', 'open'])
if bitcoin_difference > 0:
    #Now we will iterate throught the response and extract the data for the missing days
    for i in btc_response["Data"]["Data"]:
        timestamp = i["time"]
        # Convert to datetime object
        date= datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)
        
        if date <= bitcoin_max_date:
            #print("Data already exists")
            continue
        else:
            #here we fill up the dataframe with the new data 
            #new_row = pd.DataFrame([{'date': date, 'price': i["close"], 'open': i["open"], 'high': i["high"], 'low': i["low"]}])
            new_row = pd.DataFrame([{'date': date, 'open': str(i["open"])}])

            bitcoin_new_data = pd.concat([bitcoin_new_data, new_row], ignore_index=True)

    #here we will append the new data to the existing feature group
    bitcoin_feature_group.insert(bitcoin_new_data)


In [17]:
#create an empty dataframe with the new data with the columns ['date', 'fng_value', 'fng_classification'], so that we can append new data later
fng_new_data = pd.DataFrame(columns=['date', 'fng_value', 'fng_classification'])
if fng_difference > 0:
    #Now we will iterate throught the response and extract the data for the missing days
    for i in fng_response["data"]:
        timestamp = int(i["timestamp"])
        # Convert to datetime object
        date= datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)
        
        if date <= fng_max_date:
            print("Data already exists")
            continue
        else:
            #here we fill up the dataframe with the new data 
            print("Data does not exist")
            new_row = pd.DataFrame([{'date': date, 'fng_value': i["value"], 'fng_classification': i["value_classification"]}])
            
            fng_new_data = pd.concat([fng_new_data, new_row], ignore_index=True)

    #here we will append the new data to the existing feature group
    fng_feature_group.insert(bitcoin_new_data)