In [25]:
# Import necessary libraries
import pandas as pd               # For data manipulation using DataFrames
import numpy as np                # For numerical operations
import matplotlib.pyplot as plt   # For data visualization
import os                         # For operating system-related tasks
import joblib                     # For saving and loading models
import hopsworks                  # For getting access to hopsworks
import re

# Import specific modules from scikit-learn
from sklearn.preprocessing import StandardScaler, OneHotEncoder   # For data preprocessing
from sklearn.metrics import accuracy_score                        # For evaluating model accuracy

from dotenv import load_dotenv
import os
load_dotenv()

#Connecting to hopsworks
api_key = os.environ.get('hopsworks_api')
project = hopsworks.login(api_key_value=api_key)
fs = project.get_feature_store()

Connection closed.
Connected. Call `.close()` to terminate connection gracefully.



Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/693399
Connected. Call `.close()` to terminate connection gracefully.


In [26]:
# Load and display the data from CSV to confirm
tsla_df = pd.read_csv('TSLA_stock_price.csv')
print(tsla_df.head())    

         date  1. open  2. high    3. low  4. close    5. volume ticker
0  2024-05-03   182.10   184.78  178.4200    181.19   75491539.0   TSLA
1  2024-05-02   182.86   184.60  176.0200    180.01   89148041.0   TSLA
2  2024-05-01   182.00   185.86  179.0100    179.99   92829719.0   TSLA
3  2024-04-30   186.98   190.95  182.8401    183.28  127031787.0   TSLA
4  2024-04-29   188.42   198.87  184.5400    194.05  243869678.0   TSLA


In [27]:
#Defining a function to clean the column names
def clean_column_name(name):
    # Remove all non-letter characters
    cleaned_name = re.sub(r'[^a-zA-Z]', '', name)
    return cleaned_name

In [28]:
tsla_df

Unnamed: 0,date,1. open,2. high,3. low,4. close,5. volume,ticker
0,2024-05-03,182.10,184.7800,178.4200,181.19,75491539.0,TSLA
1,2024-05-02,182.86,184.6000,176.0200,180.01,89148041.0,TSLA
2,2024-05-01,182.00,185.8600,179.0100,179.99,92829719.0,TSLA
3,2024-04-30,186.98,190.9500,182.8401,183.28,127031787.0,TSLA
4,2024-04-29,188.42,198.8700,184.5400,194.05,243869678.0,TSLA
...,...,...,...,...,...,...,...
3481,2010-07-06,20.00,20.0000,15.8300,16.11,6866900.0,TSLA
3482,2010-07-02,23.00,23.1000,18.7100,19.20,5139800.0,TSLA
3483,2010-07-01,25.00,25.9200,20.2700,21.96,8218800.0,TSLA
3484,2010-06-30,25.79,30.4192,23.3000,23.83,17187100.0,TSLA


In [30]:
# Cleaning up column names for 'tsla_df'
tsla_df.columns = [clean_column_name(col) for col in tsla_df.columns]
print(tsla_df.columns)

Index(['date', 'open', 'high', 'low', 'close', 'volume', 'ticker'], dtype='object')


In [31]:
# Converting the "date" column to timestamp
tsla_df['date'] = pd.to_datetime(tsla_df['date'])

In [32]:
# Defining the stocks feature group
tesla_fg = fs.get_or_create_feature_group(
    name="tesla_stock",
    description="Tesla stock dataset from alpha vantage",
    version=1,
    primary_key=["ticker"],
    event_time=['date'],
    online_enabled=False,
)

In [18]:
#Inserting the stock data into the stocks feature group
tesla_fg.insert(tsla_df, write_options={"wait_for_job" : False})

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/693399/fs/689222/fg/786781


Uploading Dataframe: 0.00% |          | Rows 0/3486 | Elapsed Time: 00:00 | Remaining Time: ?

Launching job: tesla_stock_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/693399/jobs/named/tesla_stock_1_offline_fg_materialization/executions


(<hsfs.core.job.Job at 0x19cffe27490>, None)

In [19]:
#Collecting news df
news_df = pd.read_csv('news_articles_ema.csv')

In [20]:
#Dropping exp mean 7 days
news_df_updated = news_df.drop(columns=['exp_mean_7_days'])

In [21]:
#Updating date to datetime
news_df_updated['date'] = pd.to_datetime(news_df_updated['date'])

In [22]:
#Defining the news feature group
news_sentiment_fg = fs.get_or_create_feature_group(
    name='news_sentiment_updated',
    description='News sentiment from Polygon',
    version=1,
    primary_key=['ticker'],
    event_time=['date'],
    online_enabled=False,
)




In [23]:
#Inserting the news data into the news feature group
news_sentiment_fg.insert(news_df_updated)

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/693399/fs/689222/fg/787796


Uploading Dataframe: 0.00% |          | Rows 0/66 | Elapsed Time: 00:00 | Remaining Time: ?

Launching job: news_sentiment_updated_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/693399/jobs/named/news_sentiment_updated_1_offline_fg_materialization/executions


(<hsfs.core.job.Job at 0x19c811c2e90>, None)