# Stocks Price Prediction

In [36]:
import pandas as pd
import yfinance as yf
import numpy as np
from datetime import datetime
import os
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [23]:
# function to download and save the raw data
def get_data(ticker, start, end):
    raw_data = yf.download(ticker, start, end)
    os.makedirs(f"../datasets/{ticker}", exist_ok=True)
    raw_data.to_csv(f"../datasets/{ticker}/raw.csv")
    return raw_data

In [24]:
ticker = "GOOG"
end = datetime.now()
start = datetime(end.year - 10, end.month, end.day)
print(start)

2015-09-08 00:00:00


In [25]:
# downloading the data
raw_data = get_data(ticker, start, end)
raw_data.head()

  raw_data = yf.download(ticker, start, end)
[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume
Ticker,GOOG,GOOG,GOOG,GOOG,GOOG
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2015-09-08,30.551397,30.63341,30.027511,30.443539,45590000
2015-09-09,30.454971,31.140895,30.299893,30.877461,34042000
2015-09-10,30.883919,31.02359,30.390851,30.473858,38106000
2015-09-11,31.103613,31.10411,30.68858,30.804391,27470000
2015-09-14,30.977863,31.108088,30.788489,31.100136,34046000


In [34]:
# quick clean up
def clean_up(data):
    clean_data = data.copy()
    clean_data.columns = clean_data.columns.get_level_values(0)
    clean_data.columns.name = None
    clean_data.to_csv(f"../datasets/{ticker}/clean.csv")

clean_up(raw_data)

In [35]:
data = pd.read_csv(f"../datasets/{ticker}/clean.csv")
data.head(10)

Unnamed: 0,Date,Close,High,Low,Open,Volume
0,2015-09-08,30.551397,30.63341,30.027511,30.443539,45590000
1,2015-09-09,30.454971,31.140895,30.299893,30.877461,34042000
2,2015-09-10,30.883919,31.02359,30.390851,30.473858,38106000
3,2015-09-11,31.103613,31.10411,30.68858,30.804391,27470000
4,2015-09-14,30.977863,31.108088,30.788489,31.100136,34046000
5,2015-09-15,31.569345,31.746293,31.004701,31.149838,41688000
6,2015-09-16,31.611097,31.709015,31.429178,31.585749,25730000
7,2015-09-17,31.955055,32.352689,31.563382,31.701064,45494000
8,2015-09-18,31.276588,31.810913,31.165748,31.651362,102668000
9,2015-09-21,31.584257,31.636447,31.112065,31.532564,35770000


In [28]:
data.shape

(2514, 6)

In [29]:
data.isnull().sum()

Date      0
Close     0
High      0
Low       0
Open      0
Volume    0
dtype: int64

In [30]:
data.duplicated().sum()

np.int64(0)

In [31]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2514 entries, 0 to 2513
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    2514 non-null   object 
 1   Close   2514 non-null   float64
 2   High    2514 non-null   float64
 3   Low     2514 non-null   float64
 4   Open    2514 non-null   float64
 5   Volume  2514 non-null   int64  
dtypes: float64(4), int64(1), object(1)
memory usage: 118.0+ KB


In [32]:
data.tail(10)

Unnamed: 0,Date,Close,High,Low,Open,Volume
2504,2025-08-22,206.720001,209.229996,202.130005,203.960007,25723700
2505,2025-08-25,209.160004,211.089996,205.949997,207.0,22788800
2506,2025-08-26,207.949997,208.460007,206.449997,208.369995,23051000
2507,2025-08-27,208.210007,209.570007,206.389999,206.460007,15249000
2508,2025-08-28,212.369995,212.899994,207.600006,207.839996,20915700
2509,2025-08-29,213.529999,215.339996,210.970001,211.270004,24682200
2510,2025-09-02,211.990005,212.369003,206.960007,208.990005,28900100
2511,2025-09-03,231.100006,231.800003,225.119995,226.479996,72250300
2512,2025-09-04,232.660004,232.770004,226.759995,230.154999,32196000
2513,2025-09-05,235.169998,236.130005,232.380005,232.660004,26106200


In [33]:
data.describe()

Unnamed: 0,Close,High,Low,Open,Volume
count,2514.0,2514.0,2514.0,2514.0,2514.0
mean,92.622927,93.58107,91.62155,92.55834,29574690.0
std,48.199321,48.734898,47.645229,48.160406,14174710.0
min,29.568737,30.07125,29.294866,29.687531,6809800.0
25%,52.098194,52.714777,51.382205,52.130502,20651250.0
50%,75.591671,77.126545,74.957325,75.754708,26144150.0
75%,133.399059,134.951337,132.067476,133.375689,34124500.0
max,235.169998,236.130005,232.380005,232.660004,133078000.0
