In [4]:
!pip install yfinance --upgrade

Defaulting to user installation because normal site-packages is not writeable


In [5]:
# Import Libraries

import pandas as pd
import yfinance as yf


In [10]:
# Download Nvidia data into a csv file

nvda = yf.Ticker("NVDA")
df = nvda.history(period = 'max')
df.to_csv("nvidia_stock_data.csv")

In [49]:
# Read csv file 

df = pd.read_csv('nvidia_stock_data.csv')
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,1999-01-22 00:00:00-05:00,0.040116,0.044772,0.035579,0.037609,2714688000,0.0,0.0
1,1999-01-25 00:00:00-05:00,0.040594,0.042026,0.037609,0.041549,510480000,0.0,0.0
2,1999-01-26 00:00:00-05:00,0.042026,0.042862,0.037728,0.038325,343200000,0.0,0.0
3,1999-01-27 00:00:00-05:00,0.038444,0.0394,0.036295,0.038206,244368000,0.0,0.0
4,1999-01-28 00:00:00-05:00,0.038206,0.038444,0.037848,0.038086,227520000,0.0,0.0


In [39]:
# Check for duplicates 

df.duplicated().sum()

0

In [40]:
# Check for Null Values

df.isnull().sum()

Date            0
Open            0
High            0
Low             0
Close           0
Volume          0
Dividends       0
Stock Splits    0
dtype: int64

In [41]:
# Data types of each features

df.dtypes

Date             object
Open            float64
High            float64
Low             float64
Close           float64
Volume            int64
Dividends       float64
Stock Splits    float64
dtype: object

In [42]:
# Data description

df.describe()

Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits
count,6677.0,6677.0,6677.0,6677.0,6677.0,6677.0,6677.0
mean,11.004047,11.201458,10.788858,11.005861,592669200.0,3.1e-05,0.00322
std,29.123343,29.617955,28.560516,29.116589,429723600.0,0.000394,0.139642
min,0.031997,0.032594,0.030564,0.031281,19680000.0,0.0,0.0
25%,0.259952,0.267918,0.252158,0.260181,329788000.0,0.0,0.0
50%,0.442575,0.449245,0.435612,0.442422,494548000.0,0.0,0.0
75%,5.120161,5.209953,5.03788,5.148196,724920000.0,0.0,0.0
max,182.899994,183.880005,180.479996,181.184998,9230856000.0,0.01,10.0


In [52]:
# Change the Date format

df['Date']= pd.to_datetime(df['Date'],utc=True).dt.date

df.head()



Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,1999-01-22,0.040116,0.044772,0.035579,0.037609,2714688000,0.0,0.0
1,1999-01-25,0.040594,0.042026,0.037609,0.041549,510480000,0.0,0.0
2,1999-01-26,0.042026,0.042862,0.037728,0.038325,343200000,0.0,0.0
3,1999-01-27,0.038444,0.0394,0.036295,0.038206,244368000,0.0,0.0
4,1999-01-28,0.038206,0.038444,0.037848,0.038086,227520000,0.0,0.0


In [58]:
# Dataframe with rows when dividends are paid

dividends = df[df['Dividends'] != 0]
print(dividends)

            Date        Open        High         Low       Close      Volume  \
3480  2012-11-20    0.267166    0.268319    0.262321    0.265089   336976000   
3545  2013-02-26    0.284358    0.288768    0.283429    0.287143   487504000   
3604  2013-05-21    0.346462    0.349495    0.342496    0.348095   357784000   
3667  2013-08-20    0.348678    0.352899    0.348209    0.350554   234192000   
3731  2013-11-19    0.368248    0.371548    0.363533    0.364004   206716000   
3796  2014-02-25    0.445219    0.447113    0.439061    0.443798   242184000   
3855  2014-05-20    0.439180    0.440370    0.431805    0.433946   256200000   
3918  2014-08-19    0.459284    0.465258    0.457612    0.462869   248260000   
3983  2014-11-19    0.482346    0.483785    0.475146    0.480186   240300000   
4047  2015-02-24    0.533828    0.537441    0.529010    0.537441   149356000   
4106  2015-05-19    0.513065    0.514759    0.508225    0.509193   253808000   
4169  2015-08-18    0.564316    0.568205

In [59]:
# Dataframe with rows when dividends are paid

stock_split = df[df['Stock Splits'] != 0]
print(stock_split)

            Date        Open        High         Low       Close      Volume  \
361   2000-06-27    0.314242    0.319496    0.263858    0.272216   604608000   
664   2001-09-10    0.295559    0.300106    0.283448    0.289447  1872360000   
1813  2006-04-07    0.465804    0.469930    0.457398    0.466568   682674000   
2171  2007-09-11    0.785586    0.798652    0.777333    0.792692   405424000   
5659  2021-07-20   18.690300   18.798071   18.125499   18.572550   434687000   
6386  2024-06-10  120.322106  123.051015  116.963442  121.741539   313434100   

      Dividends  Stock Splits  
361         0.0           2.0  
664         0.0           2.0  
1813        0.0           2.0  
2171        0.0           1.5  
5659        0.0           4.0  
6386        0.0          10.0  


In [62]:
# Drop the Dividends and Stock Splits columns

df.drop(columns=['Dividends','Stock Splits'], inplace = True)

df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,1999-01-22,0.040116,0.044772,0.035579,0.037609,2714688000
1,1999-01-25,0.040594,0.042026,0.037609,0.041549,510480000
2,1999-01-26,0.042026,0.042862,0.037728,0.038325,343200000
3,1999-01-27,0.038444,0.0394,0.036295,0.038206,244368000
4,1999-01-28,0.038206,0.038444,0.037848,0.038086,227520000


In [64]:
# Calculate Simple Moving Average

df['SMA'] = df['Close'].rolling(window = 30).mean()
df.fillna(0, inplace = True)
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,SMA
0,1999-01-22,0.040116,0.044772,0.035579,0.037609,2714688000,0.0
1,1999-01-25,0.040594,0.042026,0.037609,0.041549,510480000,0.0
2,1999-01-26,0.042026,0.042862,0.037728,0.038325,343200000,0.0
3,1999-01-27,0.038444,0.0394,0.036295,0.038206,244368000,0.0
4,1999-01-28,0.038206,0.038444,0.037848,0.038086,227520000,0.0


In [66]:
# Export dataframes to csv file

df.to_csv("nvidia_stock_preprocessed.csv")
dividends.to_csv("nvidia_dividends.csv")
stock_split.to_csv("nvidia_stock_splits.csv")