In [1]:
# Import necessary libraries
import pandas as pd               # For data manipulation using DataFrames
import numpy as np                # For numerical operations
import matplotlib.pyplot as plt   # For data visualization
import os                         # For operating system-related tasks
import joblib                     # For saving and loading models
import hopsworks                  # For getting access to hopsworks
import re

# Import specific modules from scikit-learn
from sklearn.preprocessing import StandardScaler, OneHotEncoder   # For data preprocessing
from sklearn.metrics import accuracy_score                        # For evaluating model accuracy

from dotenv import load_dotenv
import os
load_dotenv()

#Connecting to hopsworks
api_key = os.environ.get('hopsworks_api')
project = hopsworks.login(api_key_value=api_key)
fs = project.get_feature_store()

Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/564374
Connected. Call `.close()` to terminate connection gracefully.


In [2]:
# Load and display the data from CSV to confirm
nvda_df = pd.read_csv('NVDA_stock_prices.csv')
print(nvda_df.head())    

         date  1. open  2. high    3. low  4. close    5. volume ticker
0  2024-08-02   103.76   108.72  101.3700    107.27  482027464.0   NVDA
1  2024-08-01   117.53   120.16  106.8104    109.21  523462326.0   NVDA
2  2024-07-31   112.90   118.34  110.8800    117.02  473174182.0   NVDA
3  2024-07-30   111.52   111.99  102.5400    103.73  486833274.0   NVDA
4  2024-07-29   113.69   116.28  111.3000    111.59  248152068.0   NVDA


In [3]:
#Defining a function to clean the column names
def clean_column_name(name):
    # Remove all non-letter characters
    cleaned_name = re.sub(r'[^a-zA-Z]', '', name)
    return cleaned_name

In [4]:
nvda_df


Unnamed: 0,date,1. open,2. high,3. low,4. close,5. volume,ticker
0,2024-08-02,103.76,108.72,101.3700,107.27,482027464.0,NVDA
1,2024-08-01,117.53,120.16,106.8104,109.21,523462326.0,NVDA
2,2024-07-31,112.90,118.34,110.8800,117.02,473174182.0,NVDA
3,2024-07-30,111.52,111.99,102.5400,103.73,486833274.0,NVDA
4,2024-07-29,113.69,116.28,111.3000,111.59,248152068.0,NVDA
...,...,...,...,...,...,...,...
6223,1999-11-05,30.00,30.25,26.8800,28.25,1284100.0,NVDA
6224,1999-11-04,27.75,29.94,27.7500,29.19,2625700.0,NVDA
6225,1999-11-03,26.00,28.13,25.8100,27.44,4191000.0,NVDA
6226,1999-11-02,23.94,25.13,23.7500,25.00,1744800.0,NVDA


In [5]:
# Cleaning up column names for 'nvidia_df'
nvda_df.columns = [clean_column_name(col) for col in nvda_df.columns]
print(nvda_df.columns)

Index(['date', 'open', 'high', 'low', 'close', 'volume', 'ticker'], dtype='object')


In [6]:
# Converting the "date" column to timestamp
nvda_df['date'] = pd.to_datetime(nvda_df['date'])

In [14]:
# Defining the stocks feature group
nvidia_fg = fs.get_or_create_feature_group(
    name="nvidia_stock",
    description="Nvidia stock dataset from alpha vantage",
    version=1,
    primary_key=["ticker"],
    event_time=['date'],
    online_enabled=False,
)




In [15]:
#Inserting the stock data into the stocks feature group
nvidia_fg.insert(nvda_df, write_options={"wait_for_job" : False})

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/564374/fs/560197/fg/1072466


Uploading Dataframe: 0.00% |          | Rows 0/6228 | Elapsed Time: 00:00 | Remaining Time: ?

Launching job: nvidia_stock_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/564374/jobs/named/nvidia_stock_1_offline_fg_materialization/executions


(<hsfs.core.job.Job at 0x1843feaca10>, None)

In [16]:
#Collecting news df
news_df = pd.read_csv('news_articles_ema.csv')

In [17]:
#Dropping exp mean 7 days
news_df_updated = news_df.drop(columns=['exp_mean_7_days'])

In [18]:
#Updating date to datetime
news_df_updated['date'] = pd.to_datetime(news_df_updated['date'])

In [20]:
#Defining the news feature group
news_sentiment_fg = fs.get_or_create_feature_group(
    name='news_sentiment_updated',
    description='News sentiment from Polygon',
    version=1,
    primary_key=['ticker'],
    event_time=['date'],
    online_enabled=False,
)




In [21]:
#Inserting the news data into the news feature group
news_sentiment_fg.insert(news_df_updated)

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/564374/fs/560197/fg/1073487


Uploading Dataframe: 0.00% |          | Rows 0/66 | Elapsed Time: 00:00 | Remaining Time: ?

Launching job: news_sentiment_updated_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/564374/jobs/named/news_sentiment_updated_1_offline_fg_materialization/executions


(<hsfs.core.job.Job at 0x18440207110>, None)