In [1]:
pip install yfinance --upgrade --no-cache-dir

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install alpha-vantage

Note: you may need to restart the kernel to use updated packages.


## YAHOO FINANCE

In [3]:
# Import relevant libraries
import os
import numpy as np
import pandas as pd
import yfinance as yf
import requests
from datetime import datetime, timedelta

In [4]:
# Set tickers
ticker_target = 'NVDA'
ticker_comp = 'AMD 005930.KS INTC' # AMD, Samsung, Intel
ticker_supp = 'TSM ASML 000660.KS MU' # TSMC, ASML, SK Hynix
ticker_other = 'AMZN MSFT GOOG' # Amazon, Microsoft, Google
ticker_mkt = '^IXIC ^GSPC ^SOX'# Indices - Nasdaq composite, S&P500, PHLX Semiconductor
ticker_all  = " ".join([ticker_target, ticker_comp, ticker_supp, ticker_other, ticker_mkt])

# Print to check 
print(ticker_all)

NVDA AMD 005930.KS INTC TSM ASML 000660.KS MU AMZN MSFT GOOG ^IXIC ^GSPC ^SOX


In [5]:
# Set end date
project_start = datetime(2024, 5, 8).date()
end_date = project_start.strftime('%Y-%m-%d')

# Set start date
timeframe = 365 * 5 # Number of days 
start = project_start - timedelta(days=timeframe)
start_date = start.strftime('%Y-%m-%d')

# Print to check
print(end_date)
print(start_date)

2024-05-08
2019-05-10


### PRICE

In [6]:
# Load stock data into a DataFrame
df = yf.download(ticker_all, start=start_date, end=end_date)

# Print to check
print(df.tail())
print(df.info())

[*********************100%%**********************]  14 of 14 completed

Price      Adj Close                                                \
Ticker     000660.KS 005930.KS         AMD        AMZN        ASML   
Date                                                                 
2024-05-01       NaN       NaN  144.270004  179.000000  852.840027   
2024-05-02  173600.0   78000.0  146.160004  184.720001  870.280029   
2024-05-03  173200.0   77600.0  150.600006  186.210007  901.630005   
2024-05-06       NaN       NaN  155.779999  188.699997  916.919983   
2024-05-07  179600.0   81300.0  154.429993  188.759995  908.219971   

Price                                                                  ...  \
Ticker            GOOG       INTC        MSFT          MU        NVDA  ...   
Date                                                                   ...   
2024-05-01  165.570007  30.247145  394.940002  109.699997  830.409973  ...   
2024-05-02  168.460007  30.386578  397.839996  112.330002  858.169983  ...   
2024-05-03  168.990005  30.775000  406.660004  11




In [7]:
# Create new DataFrame with only close prices
new_df = df['Adj Close']

# Print to check
print(new_df.tail())
print(new_df.info())

Ticker      000660.KS  005930.KS         AMD        AMZN        ASML  \
Date                                                                   
2024-05-01        NaN        NaN  144.270004  179.000000  852.840027   
2024-05-02   173600.0    78000.0  146.160004  184.720001  870.280029   
2024-05-03   173200.0    77600.0  150.600006  186.210007  901.630005   
2024-05-06        NaN        NaN  155.779999  188.699997  916.919983   
2024-05-07   179600.0    81300.0  154.429993  188.759995  908.219971   

Ticker            GOOG       INTC        MSFT          MU        NVDA  \
Date                                                                    
2024-05-01  165.570007  30.247145  394.940002  109.699997  830.409973   
2024-05-02  168.460007  30.386578  397.839996  112.330002  858.169983   
2024-05-03  168.990005  30.775000  406.660004  114.699997  887.890015   
2024-05-06  169.830002  30.969999  413.540009  120.129997  921.400024   
2024-05-07  172.979996  30.680000  409.339996  119.209999

In [8]:
# Forward fill new_df
new_df = new_df.ffill(axis=0)

In [9]:
# Create columns with daily price perc change using for loop
for column in new_df.columns:
    new_df[f'{column}_chg'] = new_df[column].pct_change() * 100
    
# Print new_df to check
print(new_df.info())
print(new_df.head())

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1296 entries, 2019-05-10 to 2024-05-07
Data columns (total 28 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   000660.KS      1296 non-null   float64
 1   005930.KS      1296 non-null   float64
 2   AMD            1296 non-null   float64
 3   AMZN           1296 non-null   float64
 4   ASML           1296 non-null   float64
 5   GOOG           1296 non-null   float64
 6   INTC           1296 non-null   float64
 7   MSFT           1296 non-null   float64
 8   MU             1296 non-null   float64
 9   NVDA           1296 non-null   float64
 10  TSM            1296 non-null   float64
 11  ^GSPC          1296 non-null   float64
 12  ^IXIC          1296 non-null   float64
 13  ^SOX           1296 non-null   float64
 14  000660.KS_chg  1295 non-null   float64
 15  005930.KS_chg  1295 non-null   float64
 16  AMD_chg        1295 non-null   float64
 17  AMZN_chg       1295 non-null   flo

In [10]:
# Reset index of new_df
new_df.reset_index(inplace=True)

# Print to check
print(new_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1296 entries, 0 to 1295
Data columns (total 29 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           1296 non-null   datetime64[ns]
 1   000660.KS      1296 non-null   float64       
 2   005930.KS      1296 non-null   float64       
 3   AMD            1296 non-null   float64       
 4   AMZN           1296 non-null   float64       
 5   ASML           1296 non-null   float64       
 6   GOOG           1296 non-null   float64       
 7   INTC           1296 non-null   float64       
 8   MSFT           1296 non-null   float64       
 9   MU             1296 non-null   float64       
 10  NVDA           1296 non-null   float64       
 11  TSM            1296 non-null   float64       
 12  ^GSPC          1296 non-null   float64       
 13  ^IXIC          1296 non-null   float64       
 14  ^SOX           1296 non-null   float64       
 15  000660.KS_chg  1295 n

### ANALYST UP/DOWNGRADES

In [11]:
# Set ticker to NVDA
ticker = yf.Ticker('NVDA')

In [12]:
# Get upgrades/downgrades data
updown_df = ticker.upgrades_downgrades

# Print to check
print(updown_df)

                                   Firm      ToGrade       FromGrade Action
GradeDate                                                                  
2024-05-10 15:09:21                HSBC          Buy             Buy   main
2024-05-07 12:00:45       Goldman Sachs          Buy             Buy   main
2024-04-30 15:23:13                 UBS          Buy             Buy   main
2024-04-16 09:19:10  Evercore ISI Group   Outperform                   init
2024-04-11 17:08:47       Raymond James   Strong Buy      Strong Buy   main
...                                 ...          ...             ...    ...
2012-05-14 10:19:00           Jefferies         Hold                   main
2012-04-05 11:07:00   Cantor Fitzgerald          Buy                   init
2012-02-16 12:29:00  Evercore ISI Group  Underweight                   main
2012-02-16 11:34:00   B of A Securities      Neutral             Buy   down
2012-02-13 12:27:00         FBR Capital   Outperform  Market Perform     up

[720 rows x

In [13]:
# Check unique values of Action column
updown_df['Action'].unique()

array(['main', 'init', 'reit', 'down', 'up'], dtype=object)

In [14]:
# Count the rows that are not upgrades or downgrades
count = updown_df[(updown_df['Action'] == 'main') | (updown_df['Action'] == 'init') | (updown_df['Action'] == 'reit')].shape[0]
print(count)

619


In [15]:
# Drop rows that are not upgrades or downgrades
fupdown_df = updown_df.drop(updown_df[(updown_df['Action'] == 'main') | (updown_df['Action'] == 'init') | (updown_df['Action'] == 'reit')].index)

# Print to check
print(fupdown_df.info())

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 101 entries, 2023-11-22 16:59:10 to 2012-02-13 12:27:00
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Firm       101 non-null    object
 1   ToGrade    101 non-null    object
 2   FromGrade  101 non-null    object
 3   Action     101 non-null    object
dtypes: object(4)
memory usage: 3.9+ KB
None


In [16]:
# Update values of Action column to integers
fupdown_df['Action'] = fupdown_df['Action'].replace({'up': 1, 'down': -1})

# Print to check
print(fupdown_df.info())

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 101 entries, 2023-11-22 16:59:10 to 2012-02-13 12:27:00
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Firm       101 non-null    object
 1   ToGrade    101 non-null    object
 2   FromGrade  101 non-null    object
 3   Action     101 non-null    int64 
dtypes: int64(1), object(3)
memory usage: 3.9+ KB
None


In [17]:
# Drop irrelevant columns
irr_col = ['Firm', 'ToGrade', 'FromGrade']
fupdown_df = fupdown_df.drop(irr_col, axis = 1)

# Reset index of fupdown_df
fupdown_df.reset_index(inplace=True)

# Rename column
fupdown_df.rename(columns={'GradeDate': 'Date'}, inplace=True)

# Change Date to exclude time
fupdown_df['Date'] = fupdown_df['Date'].dt.normalize()

# Print to check
print(fupdown_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 101 entries, 0 to 100
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    101 non-null    datetime64[ns]
 1   Action  101 non-null    int64         
dtypes: datetime64[ns](1), int64(1)
memory usage: 1.7 KB
None


In [18]:
# Combine Action values for each date
fupdown_df = fupdown_df.groupby('Date')['Action'].sum().reset_index()

# Print to check
print(fupdown_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 92 entries, 0 to 91
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    92 non-null     datetime64[ns]
 1   Action  92 non-null     int64         
dtypes: datetime64[ns](1), int64(1)
memory usage: 1.6 KB
None


In [19]:
# Merge Action column into new_df
new_df = pd.merge(new_df, fupdown_df, on='Date', how='left')

# Print to check
print(new_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1296 entries, 0 to 1295
Data columns (total 30 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           1296 non-null   datetime64[ns]
 1   000660.KS      1296 non-null   float64       
 2   005930.KS      1296 non-null   float64       
 3   AMD            1296 non-null   float64       
 4   AMZN           1296 non-null   float64       
 5   ASML           1296 non-null   float64       
 6   GOOG           1296 non-null   float64       
 7   INTC           1296 non-null   float64       
 8   MSFT           1296 non-null   float64       
 9   MU             1296 non-null   float64       
 10  NVDA           1296 non-null   float64       
 11  TSM            1296 non-null   float64       
 12  ^GSPC          1296 non-null   float64       
 13  ^IXIC          1296 non-null   float64       
 14  ^SOX           1296 non-null   float64       
 15  000660.KS_chg  1295 n

## BARCHART

### INSIDER TRANSACTIONS

In [20]:
# Get directory
current_dir = os.getcwd()

# Set folder path
folder_path = os.path.join(current_dir, 'raw')

# Create the file path
file_path_ins = os.path.join(folder_path, 'nvda_ins_sale.csv')

# Import data from csv
ins_df = pd.read_csv(file_path_ins)

# Print to check
print(ins_df.info())
print(ins_df.tail())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 282 entries, 0 to 281
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Date          282 non-null    object 
 1   Insider Name  282 non-null    object 
 2   Title         180 non-null    object 
 3   Transaction   282 non-null    object 
 4   Shares        282 non-null    int64  
 5   @Price        282 non-null    float64
 6   Trans Total   282 non-null    int64  
 7   Shares After  282 non-null    int64  
 8   %Held After   282 non-null    float64
dtypes: float64(2), int64(3), object(4)
memory usage: 20.0+ KB
None
           Date      Insider Name          Title          Transaction  Shares  \
277  2019-03-20  Timothy S. Teter  Gen Cou & Sec  Sale Post-exercise*    1790   
278  2019-03-20    Dr. Jay K Puri      VP of Ops  Sale Post-exercise*   29312   
279  2019-03-20   Dr Jensen Huang            CEO  Sale Post-exercise*  115359   
280  2019-03-01     Michael Byr

In [21]:
# Convert Date to datetime format
ins_df['Date'] = pd.to_datetime(ins_df['Date'])

# Print to check
print(ins_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 282 entries, 0 to 281
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   Date          282 non-null    datetime64[ns]
 1   Insider Name  282 non-null    object        
 2   Title         180 non-null    object        
 3   Transaction   282 non-null    object        
 4   Shares        282 non-null    int64         
 5   @Price        282 non-null    float64       
 6   Trans Total   282 non-null    int64         
 7   Shares After  282 non-null    int64         
 8   %Held After   282 non-null    float64       
dtypes: datetime64[ns](1), float64(2), int64(3), object(3)
memory usage: 20.0+ KB
None


In [22]:
# Combine transaction values for each date
ins_df = ins_df.groupby('Date')['Trans Total'].sum().reset_index()

# Print to check
print(ins_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 155 entries, 0 to 154
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Date         155 non-null    datetime64[ns]
 1   Trans Total  155 non-null    int64         
dtypes: datetime64[ns](1), int64(1)
memory usage: 2.6 KB
None


In [23]:
# Merge Trans Total column into new_df
new_df = pd.merge(new_df, ins_df, on='Date', how='left')

# Print to check
print(new_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1296 entries, 0 to 1295
Data columns (total 31 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           1296 non-null   datetime64[ns]
 1   000660.KS      1296 non-null   float64       
 2   005930.KS      1296 non-null   float64       
 3   AMD            1296 non-null   float64       
 4   AMZN           1296 non-null   float64       
 5   ASML           1296 non-null   float64       
 6   GOOG           1296 non-null   float64       
 7   INTC           1296 non-null   float64       
 8   MSFT           1296 non-null   float64       
 9   MU             1296 non-null   float64       
 10  NVDA           1296 non-null   float64       
 11  TSM            1296 non-null   float64       
 12  ^GSPC          1296 non-null   float64       
 13  ^IXIC          1296 non-null   float64       
 14  ^SOX           1296 non-null   float64       
 15  000660.KS_chg  1295 n

## ALPHA VANTAGE

### EARNINGS SURPRISE

In [24]:
# Set url to retrieve data
url = 'https://www.alphavantage.co/query?function=EARNINGS&symbol=NVDA&apikey=JI8F8CG68TLZIDYU'

# Retrieve data
r = requests.get(url)
av_data = r.json()

In [25]:
# Check keys
print(av_data.keys())

dict_keys(['symbol', 'annualEarnings', 'quarterlyEarnings'])


In [26]:
# Convert json into a DataFrame
quarterlye_df = pd.DataFrame(av_data['quarterlyEarnings'])

# Print to check
print(quarterlye_df)
print(quarterlye_df.info())

   fiscalDateEnding reportedDate reportedEPS estimatedEPS surprise  \
0        2024-01-31   2024-02-21        5.16         4.29     0.87   
1        2023-10-31   2023-11-21        4.02         3.37     0.65   
2        2023-07-31   2023-08-23         2.7         2.09     0.61   
3        2023-04-30   2023-05-24        1.09         0.92     0.17   
4        2023-01-31   2023-02-22        0.88         0.81     0.07   
..              ...          ...         ...          ...      ...   
95       2000-04-30   2000-05-16        0.01         0.01        0   
96       2000-01-31   2000-02-15        0.01         0.01        0   
97       1999-10-31   1999-11-18        0.01         0.02    -0.01   
98       1999-07-31   1999-08-18        0.02         0.02        0   
99       1999-04-30   1999-05-18        0.02         0.01     0.01   

   surprisePercentage   reportTime  
0             20.2797  post-market  
1             19.2878  post-market  
2             29.1866  post-market  
3          

In [27]:
# Create new DataFrame from quarterlye_df
surprise_df = quarterlye_df[['reportedDate','surprise','surprisePercentage']].copy()

# Rename column
surprise_df.rename(columns={'reportedDate': 'Date'}, inplace=True)

# Convert Date to datetime format
surprise_df['Date'] = pd.to_datetime(surprise_df['Date'])

# Print to check
print(surprise_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 3 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   Date                100 non-null    datetime64[ns]
 1   surprise            100 non-null    object        
 2   surprisePercentage  100 non-null    object        
dtypes: datetime64[ns](1), object(2)
memory usage: 2.5+ KB
None


In [28]:
# Merge surprise and surprisePercentage into new_df
new_df = pd.merge(new_df, surprise_df, on='Date', how='left')

# Print to check
print(new_df.info())
print(new_df.iloc[1100:1150])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1296 entries, 0 to 1295
Data columns (total 33 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   Date                1296 non-null   datetime64[ns]
 1   000660.KS           1296 non-null   float64       
 2   005930.KS           1296 non-null   float64       
 3   AMD                 1296 non-null   float64       
 4   AMZN                1296 non-null   float64       
 5   ASML                1296 non-null   float64       
 6   GOOG                1296 non-null   float64       
 7   INTC                1296 non-null   float64       
 8   MSFT                1296 non-null   float64       
 9   MU                  1296 non-null   float64       
 10  NVDA                1296 non-null   float64       
 11  TSM                 1296 non-null   float64       
 12  ^GSPC               1296 non-null   float64       
 13  ^IXIC               1296 non-null   float64     

## CHART EXCHANGE

### SHORT INTEREST

In [29]:
# Create the file path
file_path_short = os.path.join(folder_path, 'nvda_short_only.csv')

# Import data from csv
short_df = pd.read_csv(file_path_short)

# Print to check
print(short_df.info())
print(short_df.tail())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 376 entries, 0 to 375
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   date         376 non-null    int64  
 1   total_short  376 non-null    int64  
 2   perc_short   376 non-null    float64
dtypes: float64(1), int64(2)
memory usage: 8.9 KB
None
         date  total_short  perc_short
371  20221111     29517874    0.618594
372  20221110     26907680    0.574049
373  20221109     18794182    0.594490
374  20221108     27869720    0.608833
375  20221107     19608450    0.635426


In [30]:
# Change data type of date column to string
short_df['date'] = short_df['date'].astype(str)

# Print to check
print(short_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 376 entries, 0 to 375
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   date         376 non-null    object 
 1   total_short  376 non-null    int64  
 2   perc_short   376 non-null    float64
dtypes: float64(1), int64(1), object(1)
memory usage: 8.9+ KB
None


In [31]:
# Change data type of date column to date
short_df['date'] = pd.to_datetime(short_df['date'], format='%Y%m%d')
short_df['date'] = pd.to_datetime(short_df['date'], format='%Y-%m-%d')

# Rename date column
short_df.rename(columns={'date': 'Date'}, inplace=True)

# Print to check
print(short_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 376 entries, 0 to 375
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Date         376 non-null    datetime64[ns]
 1   total_short  376 non-null    int64         
 2   perc_short   376 non-null    float64       
dtypes: datetime64[ns](1), float64(1), int64(1)
memory usage: 8.9 KB
None


In [32]:
# Merge total_short and perc_short into new_df
new_df = pd.merge(new_df, short_df, on='Date', how='left')

# Print to check
print(new_df.info())
print(new_df.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1296 entries, 0 to 1295
Data columns (total 35 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   Date                1296 non-null   datetime64[ns]
 1   000660.KS           1296 non-null   float64       
 2   005930.KS           1296 non-null   float64       
 3   AMD                 1296 non-null   float64       
 4   AMZN                1296 non-null   float64       
 5   ASML                1296 non-null   float64       
 6   GOOG                1296 non-null   float64       
 7   INTC                1296 non-null   float64       
 8   MSFT                1296 non-null   float64       
 9   MU                  1296 non-null   float64       
 10  NVDA                1296 non-null   float64       
 11  TSM                 1296 non-null   float64       
 12  ^GSPC               1296 non-null   float64       
 13  ^IXIC               1296 non-null   float64     

## MACROTRENDS

### P/E RATIO

In [33]:
# Create the file path
file_path_pe = os.path.join(folder_path, 'nvda_peratio_only.csv')

# Import data from csv
pe_df = pd.read_csv(file_path_pe)

# Print to check
print(pe_df.info())
print(pe_df.tail())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59 entries, 0 to 58
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Date      59 non-null     object 
 1   PE Ratio  59 non-null     float64
dtypes: float64(1), object(1)
memory usage: 1.1+ KB
None
          Date  PE Ratio
54  2010-10-31     30.66
55  2010-07-31     21.10
56  2010-04-30     31.36
57  2010-01-31      0.00
58  2009-10-31      0.00


In [34]:
# Convert Date to datetime format
pe_df['Date'] = pd.to_datetime(pe_df['Date'])

# Print to check
print(pe_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59 entries, 0 to 58
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   Date      59 non-null     datetime64[ns]
 1   PE Ratio  59 non-null     float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 1.1 KB
None


In [35]:
# Merge PE ratio into new_df
new_df = pd.merge(new_df, pe_df, on='Date', how='left')

# Print to check
print(new_df.info())
print(new_df.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1296 entries, 0 to 1295
Data columns (total 36 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   Date                1296 non-null   datetime64[ns]
 1   000660.KS           1296 non-null   float64       
 2   005930.KS           1296 non-null   float64       
 3   AMD                 1296 non-null   float64       
 4   AMZN                1296 non-null   float64       
 5   ASML                1296 non-null   float64       
 6   GOOG                1296 non-null   float64       
 7   INTC                1296 non-null   float64       
 8   MSFT                1296 non-null   float64       
 9   MU                  1296 non-null   float64       
 10  NVDA                1296 non-null   float64       
 11  TSM                 1296 non-null   float64       
 12  ^GSPC               1296 non-null   float64       
 13  ^IXIC               1296 non-null   float64     

### VOLUME (YAHOO FINANCE)

In [36]:
# Create new DataFrame with only volume
vol_df = df['Volume']

# Reset index of vol_df
vol_df.reset_index(inplace=True)

# Print to check 
print(vol_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1296 entries, 0 to 1295
Data columns (total 15 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       1296 non-null   datetime64[ns]
 1   000660.KS  1230 non-null   float64       
 2   005930.KS  1230 non-null   float64       
 3   AMD        1257 non-null   float64       
 4   AMZN       1257 non-null   float64       
 5   ASML       1257 non-null   float64       
 6   GOOG       1257 non-null   float64       
 7   INTC       1257 non-null   float64       
 8   MSFT       1257 non-null   float64       
 9   MU         1257 non-null   float64       
 10  NVDA       1257 non-null   float64       
 11  TSM        1257 non-null   float64       
 12  ^GSPC      1257 non-null   float64       
 13  ^IXIC      1257 non-null   float64       
 14  ^SOX       1257 non-null   float64       
dtypes: datetime64[ns](1), float64(14)
memory usage: 152.0 KB
None


In [37]:
# Create new DataFrame with only NVDA volume
nvol_df = vol_df[['Date','NVDA']]

# Rename NVDA column
nvol_df.rename(columns={'NVDA':'Volume'}, inplace=True)

# Print to check
print(nvol_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1296 entries, 0 to 1295
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    1296 non-null   datetime64[ns]
 1   Volume  1257 non-null   float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 20.4 KB
None


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nvol_df.rename(columns={'NVDA':'Volume'}, inplace=True)


In [38]:
# Merge Volume column into new_df
new_df = pd.merge(new_df, nvol_df, on='Date', how='left')

# Print to check
print(new_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1296 entries, 0 to 1295
Data columns (total 37 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   Date                1296 non-null   datetime64[ns]
 1   000660.KS           1296 non-null   float64       
 2   005930.KS           1296 non-null   float64       
 3   AMD                 1296 non-null   float64       
 4   AMZN                1296 non-null   float64       
 5   ASML                1296 non-null   float64       
 6   GOOG                1296 non-null   float64       
 7   INTC                1296 non-null   float64       
 8   MSFT                1296 non-null   float64       
 9   MU                  1296 non-null   float64       
 10  NVDA                1296 non-null   float64       
 11  TSM                 1296 non-null   float64       
 12  ^GSPC               1296 non-null   float64       
 13  ^IXIC               1296 non-null   float64     

### FED FUNDS RATE (ALPHA VANTAGE)

In [39]:
# Set url to retrieve data
url_ffr = 'https://www.alphavantage.co/query?function=FEDERAL_FUNDS_RATE&interval=daily&apikey=JI8F8CG68TLZIDYU'

# Retrieve data
r = requests.get(url_ffr)
ffr_data = r.json()

In [40]:
# Check keys
print(ffr_data.keys())

dict_keys(['name', 'interval', 'unit', 'data'])


In [41]:
# Convert json into a DataFrame
ffr_df = pd.DataFrame(ffr_data['data'])

# Print to check
print(ffr_df)
print(ffr_df.info())

             date  value
0      2024-05-09   5.33
1      2024-05-08   5.33
2      2024-05-07   5.33
3      2024-05-06   5.33
4      2024-05-05   5.33
...           ...    ...
25511  1954-07-05   0.88
25512  1954-07-04  1.250
25513  1954-07-03  1.250
25514  1954-07-02  1.250
25515  1954-07-01   1.13

[25516 rows x 2 columns]
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25516 entries, 0 to 25515
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   date    25516 non-null  object
 1   value   25516 non-null  object
dtypes: object(2)
memory usage: 398.8+ KB
None


In [42]:
# Convert Date to datetime format
ffr_df['date'] = pd.to_datetime(ffr_df['date'])

# Rename columns
ffr_df.rename(columns={'date':'Date','value':'Interest Rate'},inplace=True)

# Print to check
print(ffr_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25516 entries, 0 to 25515
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           25516 non-null  datetime64[ns]
 1   Interest Rate  25516 non-null  object        
dtypes: datetime64[ns](1), object(1)
memory usage: 398.8+ KB
None


In [43]:
# Create mask to identify rows with dates earlier than cutoff_date
mask = ffr_df['Date'] < start_date

# Use mask to filter out rows
ffr_df = ffr_df[~mask]

# Print to check
print(ffr_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 1827 entries, 0 to 1826
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           1827 non-null   datetime64[ns]
 1   Interest Rate  1827 non-null   object        
dtypes: datetime64[ns](1), object(1)
memory usage: 42.8+ KB
None


In [44]:
# Merge Interest Rate column into new_df
new_df = pd.merge(new_df, ffr_df, on='Date', how='left')

# Print to check
print(new_df.info())
print(new_df.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1296 entries, 0 to 1295
Data columns (total 38 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   Date                1296 non-null   datetime64[ns]
 1   000660.KS           1296 non-null   float64       
 2   005930.KS           1296 non-null   float64       
 3   AMD                 1296 non-null   float64       
 4   AMZN                1296 non-null   float64       
 5   ASML                1296 non-null   float64       
 6   GOOG                1296 non-null   float64       
 7   INTC                1296 non-null   float64       
 8   MSFT                1296 non-null   float64       
 9   MU                  1296 non-null   float64       
 10  NVDA                1296 non-null   float64       
 11  TSM                 1296 non-null   float64       
 12  ^GSPC               1296 non-null   float64       
 13  ^IXIC               1296 non-null   float64     

### SIMPLE MOVING AVERAGE (ALPHA VANTAGE)

In [45]:
# Set url to retrieve data
url_sma50 = f'https://www.alphavantage.co/query?function=SMA&symbol=NVDA&interval=daily&time_period=50&series_type=open&apikey=JI8F8CG68TLZIDYU'

#Retrieve data
r = requests.get(url_sma50)
sma50_data = r.json()

In [46]:
# Check keys
print(sma50_data.keys())

dict_keys(['Meta Data', 'Technical Analysis: SMA'])


In [47]:
# Convert json into a DataFrame
sma50_df = pd.DataFrame(sma50_data['Technical Analysis: SMA'])

# Print to check
print(sma50_df)
print(sma50_df.info())

    2024-05-13 2024-05-10 2024-05-09 2024-05-08 2024-05-07 2024-05-06  \
SMA   880.1211   878.0248   875.7819   873.1994   871.1784   868.8980   

    2024-05-03 2024-05-02 2024-05-01 2024-04-30  ... 2000-01-25 2000-01-24  \
SMA   867.1773   864.6238   861.3345   858.7079  ...     0.7754     0.7699   

    2000-01-21 2000-01-20 2000-01-19 2000-01-18 2000-01-14 2000-01-13  \
SMA     0.7632     0.7570     0.7500     0.7439     0.7375     0.7311   

    2000-01-12 2000-01-11  
SMA     0.7248     0.7173  

[1 rows x 6123 columns]
<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, SMA to SMA
Columns: 6123 entries, 2024-05-13 to 2000-01-11
dtypes: object(6123)
memory usage: 47.8+ KB
None


In [48]:
# Melt sma50_df
sma50_df = pd.melt(sma50_df)

# Print to check
print(sma50_df)
print(sma50_df.info())

        variable     value
0     2024-05-13  880.1211
1     2024-05-10  878.0248
2     2024-05-09  875.7819
3     2024-05-08  873.1994
4     2024-05-07  871.1784
...          ...       ...
6118  2000-01-18    0.7439
6119  2000-01-14    0.7375
6120  2000-01-13    0.7311
6121  2000-01-12    0.7248
6122  2000-01-11    0.7173

[6123 rows x 2 columns]
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6123 entries, 0 to 6122
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   variable  6123 non-null   object
 1   value     6123 non-null   object
dtypes: object(2)
memory usage: 95.8+ KB
None


In [49]:
# Rename columns
sma50_df.rename(columns={'variable':'Date','value':'50D SMA'},inplace=True)

# Convert Date to datetime format
sma50_df['Date'] = pd.to_datetime(sma50_df['Date'])

# Print to check
print(sma50_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6123 entries, 0 to 6122
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   Date     6123 non-null   datetime64[ns]
 1   50D SMA  6123 non-null   object        
dtypes: datetime64[ns](1), object(1)
memory usage: 95.8+ KB
None


In [50]:
# Set url to retrieve data
url_sma200 = f'https://www.alphavantage.co/query?function=SMA&symbol=NVDA&interval=daily&time_period=200&series_type=open&apikey=JI8F8CG68TLZIDYU'

# Retrieve data
r = requests.get(url_sma200)
sma200_data = r.json()

In [51]:
# Convert json into a DataFrame
sma200_df = pd.DataFrame(sma200_data['Technical Analysis: SMA'])

# Print to check
print(sma200_df)
print(sma200_df.info())

    2024-05-13 2024-05-10 2024-05-09 2024-05-08 2024-05-07 2024-05-06  \
SMA   602.2055   600.0071   597.7924   595.5125   593.2744   591.0084   

    2024-05-03 2024-05-02 2024-05-01 2024-04-30  ... 2000-08-28 2000-08-25  \
SMA   588.8638   586.8470   584.9591   583.0192  ...     1.6549     1.6432   

    2000-08-24 2000-08-23 2000-08-22 2000-08-21 2000-08-18 2000-08-17  \
SMA     1.6314     1.6200     1.6093     1.5980     1.5854     1.5737   

    2000-08-16 2000-08-15  
SMA     1.5622     1.5500  

[1 rows x 5973 columns]
<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, SMA to SMA
Columns: 5973 entries, 2024-05-13 to 2000-08-15
dtypes: object(5973)
memory usage: 46.7+ KB
None


In [52]:
# Melt sma200_df
sma200_df = pd.melt(sma200_df)

# Print to check
print(sma200_df)
print(sma200_df.info())

        variable     value
0     2024-05-13  602.2055
1     2024-05-10  600.0071
2     2024-05-09  597.7924
3     2024-05-08  595.5125
4     2024-05-07  593.2744
...          ...       ...
5968  2000-08-21    1.5980
5969  2000-08-18    1.5854
5970  2000-08-17    1.5737
5971  2000-08-16    1.5622
5972  2000-08-15    1.5500

[5973 rows x 2 columns]
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5973 entries, 0 to 5972
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   variable  5973 non-null   object
 1   value     5973 non-null   object
dtypes: object(2)
memory usage: 93.5+ KB
None


In [53]:
# Rename columns
sma200_df.rename(columns={'variable':'Date','value':'200D SMA'},inplace=True)

# Convert Date to datetime format
sma200_df['Date'] = pd.to_datetime(sma200_df['Date'])

# Print to check
print(sma200_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5973 entries, 0 to 5972
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   Date      5973 non-null   datetime64[ns]
 1   200D SMA  5973 non-null   object        
dtypes: datetime64[ns](1), object(1)
memory usage: 93.5+ KB
None


In [54]:
# Merge 50d SMA and 200d SMA columns into new_df
new_df = pd.merge(new_df, sma50_df, on='Date', how='left')
new_df = pd.merge(new_df, sma200_df, on='Date', how='left')

# Print to check
print(new_df.info())
print(new_df.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1296 entries, 0 to 1295
Data columns (total 40 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   Date                1296 non-null   datetime64[ns]
 1   000660.KS           1296 non-null   float64       
 2   005930.KS           1296 non-null   float64       
 3   AMD                 1296 non-null   float64       
 4   AMZN                1296 non-null   float64       
 5   ASML                1296 non-null   float64       
 6   GOOG                1296 non-null   float64       
 7   INTC                1296 non-null   float64       
 8   MSFT                1296 non-null   float64       
 9   MU                  1296 non-null   float64       
 10  NVDA                1296 non-null   float64       
 11  TSM                 1296 non-null   float64       
 12  ^GSPC               1296 non-null   float64       
 13  ^IXIC               1296 non-null   float64     

In [55]:
# Rename columns
column_names = {'Action': 'Analyst Action',
                'Trans Total': 'Insider Sales',
                'surprise': 'Surprise',
                'surprisePercentage': 'Surprise Perc',
                'total_short':'Short',
                'perc_short': 'Short Perc'}
new_df.rename(columns=column_names, inplace=True)

# Print to check
print(new_df.info())
print(new_df.describe())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1296 entries, 0 to 1295
Data columns (total 40 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Date            1296 non-null   datetime64[ns]
 1   000660.KS       1296 non-null   float64       
 2   005930.KS       1296 non-null   float64       
 3   AMD             1296 non-null   float64       
 4   AMZN            1296 non-null   float64       
 5   ASML            1296 non-null   float64       
 6   GOOG            1296 non-null   float64       
 7   INTC            1296 non-null   float64       
 8   MSFT            1296 non-null   float64       
 9   MU              1296 non-null   float64       
 10  NVDA            1296 non-null   float64       
 11  TSM             1296 non-null   float64       
 12  ^GSPC           1296 non-null   float64       
 13  ^IXIC           1296 non-null   float64       
 14  ^SOX            1296 non-null   float64       
 15  0006

In [56]:
# Export new_df to csv
new_df.to_csv('compiled.csv')