## Table of contents
- [Importing Libraries And Packages](#lib)
- [Importing the dataset](#data)
- [Checking the missing values](#miss)
- [Preprocessing Date and convert into Datatime format](#date)
- [Exploratory Data Analysis using pandas_profiling package](#profile)
- [Exploratory Data Analysis EDA](#EDA)
    - [Exploring the duration of the dataset](#duration)
    - [Take an overview of open and close during this duration](#ov_open_close)
    - [Take an overview of High and Low during this duration](#ov_high_low)
    - [Monthwise comparision between Stock open and close price](#month_open_close)
    - [Monthwise comparision between Stock high and low price](#month_high_low)
    - [Analyzing Stocks Through Each Year](#ana)
        - [2016](#2016)
        - [2017](#2017)
        - [2018](#2018)
        - [2019](#2019)
        - [2020](#2020)
        - [2021](#2021)
    - [Analyzing the differnce closing prices for each year](#differ)
        - [2016](#differ_2016)
        - [2017](#differ_2017)
        - [2018](#differ_2018)
        - [2019](#differ_2019)
        - [2020](#differ_2020)
        - [2021](#differ_2021)
- [senario : how much 1000 would be worth if invested at 2016-08-16](#senario)


<a name='lib'> </a>
# Importing Libraries And Packages

In [None]:
import pandas as pd
import pandas_profiling as pp
import matplotlib.pyplot as plt
import missingno as msno
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


<a name='data' ></a>
# Importing The Dataset

In [None]:
data=pd.read_csv('/kaggle/input/tesla-stock-data-20162021/TSLA.csv')
data.head()

<a name='miss' ></a>
# Checking The Missing Values


In [None]:
msno.bar(data)

<a name='date' ></a>
# Converting The Date Column From String To Datetime

In [None]:
data['Date']=pd.to_datetime(data.Date)
data.info()

<a name='profile' ></a>
# EDA- pandas_profiling
Getting the Exploratory Data Analysis from pandas_profiling packagea

In [None]:
pp.ProfileReport(data)

<a name='EDA'></a>
# EDA


<a name='duration'></a>
##### 1- Find the Duration

In [None]:
start_date=data['Date'].min()
end_date=data['Date'].max()
duration=end_date-start_date
print('Start date :',start_date)
print('End date :',end_date)
print('Duration :',duration)

<a name='ov_open_close' ></a>
##### 2-Take an overview of open and close during this duration


In [None]:
data.set_index('Date',inplace=True)
data.head()

In [None]:
data_open_close=data[['Open','Close']]
data_open_close['Average_open']=data_open_close.Open.rolling(window=10).mean()
data_open_close['Average_close']=data_open_close.Close.rolling(window=10).mean()
data_open_close['STD_open']=data_open_close.Open.rolling(window=10).std()
data_open_close['STD_close']=data_open_close.Close.rolling(window=10).std()

In [None]:
fig=px.line(data_open_close,x=data_open_close.index,y=[data_open_close.Open,data_open_close.Close,
                                                      data_open_close.Average_open,data_open_close.Average_close,
                                                      data_open_close.STD_open,data_open_close.STD_close],labels={'Date':'Date','value'
                                                                                                          :'Price'},
           width=800,height=500)
fig.update_layout(title='Overview of Open and close prices during 2016 to 2021',font_size=15,
                 legend_title_text='Legend')
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.update_layout({'plot_bgcolor':'white'})
fig.show()

<a name='ov_high_low'></a>
##### 3-Take an overview of High and Low during this duration)

In [None]:
data_high_low=data[['High','Low']]
data_high_low['Average_high']=data_high_low.High.rolling(window=10).mean()
data_high_low['Average_low']=data_high_low.Low.rolling(window=10).mean()
data_high_low['STD_low']=data_high_low.Low.rolling(window=10).std()
data_high_low['STD_high']=data_high_low.High.rolling(window=10).std()
data_high_low.head()


In [None]:
fig=px.line(data_high_low,x=data_high_low.index,y=[data_high_low.High,data_high_low.Low,data_high_low.Average_high,
                                                  data_high_low.Average_low,
                                                  data_high_low.STD_high,data_high_low.STD_low],labels={'Date':'Date','value'
                                                                                                          :'Price'},
           width=800,height=500)
fig.update_layout(title='Overview of High and Low prices during 2016 to 2021',font_size=15,
                 legend_title_text='Legend')
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.update_layout({'plot_bgcolor':'white'})
fig.show()

<a name='month_open_close'></a>
##### 4-Monthwise comparision between Stock open and close price

In [None]:
monthwise=data.groupby(data.index.strftime('%B'))[['Open','Close']].mean()
month=['January','February','March','April','May','June','July','August','September','October',
       'November','December']
monthwise=monthwise.reindex(month)
monthwise

In [None]:
fig=px.bar(monthwise)
fig.update_layout(barmode='group',title='Monthwise comparision between Stock open and close price',
                 font_size=15,xaxis_title='Months',yaxis_title='Price')
fig.update_layout({'plot_bgcolor':'white'})


fig.show()

<a name='month_high_low'></a>
##### 5-Monthwise comparision between Stock high and low price

In [None]:
monthwise1=data.groupby(data.index.strftime('%B'))['Low'].min()
monthwise2=data.groupby(data.index.strftime('%B'))['High'].max()
month=['January','February','March','April','May','June','July','August','September','October',
       'November','December']
monthwise1=monthwise1.reindex(month)
monthwise2=monthwise2.reindex(month)
monthwise1=pd.DataFrame(monthwise1)
monthwise2=pd.DataFrame(monthwise2)
monthwise2['Low']=monthwise1['Low']
monthwise2

In [None]:
fig=px.bar(monthwise2)
fig.update_layout(barmode='group',title='Monthwise comparision between Stock open and close price',
                 font_size=15,xaxis_title='Months',yaxis_title='Price'
                 )
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.update_layout({'plot_bgcolor':'white'})
fig.show()

<a name='ana'></a>
### Analyzing Stocks Through Each Year

In [None]:
def plot(year,comp1,comp2):
    fig=px.line(data.loc[year],x=data.loc[year].index,y=[comp1,comp2])
    fig.update_layout(title=f'Analyzing the stocks in {year}',
                     xaxis_title='Month',yaxis_title='Prices')
    fig.update_layout({'plot_bgcolor':'white'})
    fig.update_xaxes(showgrid=False)
    fig.update_yaxes(showgrid=False)
    return fig.show()

    
    

In [None]:
def duration(year):
    start=data.loc[year].index.min()
    end=data.loc[year].index.max()
    duration=end-start
    print('Start =',start)
    print('End =',end)
    print('duration ',duration)


In [None]:
def ECDF(data):
    n=len(data)
    x=np.sort(data)
    y=np.arange(1,1+n)/n
    return x,y

<a name='2016'></a>
# 2016

In [None]:
data.loc['2016'].count()

In [None]:
duration('2016')

In [None]:
plot('2016','Open','Close')

In [None]:
plot('2016','High','Low')

In [None]:
X_ecdf,y_ecdf=ECDF(data.loc['2016'].Close.values)
fig=px.scatter(x=X_ecdf,y=y_ecdf,title='ECDF Plot for the Closing price')
fig.update_layout(xaxis_title='Closing price',yaxis_title='ECDF')
fig.update_layout({'plot_bgcolor':'white'})
fig.update_xaxes(showgrid=False)
fig.update_xaxes(showgrid=False)
fig.show()

<a name='2017'></a>
# 2017

In [None]:
data.loc['2017'].count()

In [None]:
duration('2017')

In [None]:
plot('2017','Open','Close')

In [None]:
plot('2017','High','Low')

In [None]:
X_ecdf,y_ecdf=ECDF(data.loc['2017'].Close.values)
fig=px.scatter(x=X_ecdf,y=y_ecdf,title='ECDF Plot for the Closing price')
fig.update_layout(xaxis_title='Closing price',yaxis_title='ECDF')
fig.update_layout({'plot_bgcolor':'white'})
fig.update_xaxes(showgrid=False)
fig.update_xaxes(showgrid=False)
fig.show()

<a name='2018'></a>
# 2018

In [None]:
data.loc['2018'].count()

In [None]:
duration('2018')

In [None]:
plot('2018','Open','Close')

In [None]:
plot('2018','High','Low')

In [None]:
X_ecdf,y_ecdf=ECDF(data.loc['2018'].Close.values)
fig=px.scatter(x=X_ecdf,y=y_ecdf,title='ECDF Plot for the Closing price')
fig.update_layout(xaxis_title='Closing price',yaxis_title='ECDF')
fig.update_layout({'plot_bgcolor':'white'})
fig.update_xaxes(showgrid=False)
fig.update_xaxes(showgrid=False)
fig.show()

<a name='2019'></a>
# 2019

In [None]:
data.loc['2019'].count()

In [None]:
duration('2019')

In [None]:
plot('2019','Open','Close')

In [None]:
plot('2019','High','Low')

In [None]:
X_ecdf,y_ecdf=ECDF(data.loc['2019'].Close.values)
fig=px.scatter(x=X_ecdf,y=y_ecdf,title='ECDF Plot for the Closing price')
fig.update_layout(xaxis_title='Closing price',yaxis_title='ECDF')
fig.update_layout({'plot_bgcolor':'white'})
fig.update_xaxes(showgrid=False)
fig.update_xaxes(showgrid=False)
fig.show()

<a name='2020'></a>

# 2020

In [None]:
data.loc['2020'].count()

In [None]:
duration('2020')

In [None]:
plot('2020','Open','Close')

In [None]:
plot('2020','High','Low')

In [None]:
X_ecdf,y_ecdf=ECDF(data.loc['2020'].Close.values)
fig=px.scatter(x=X_ecdf,y=y_ecdf,title='ECDF Plot for the Closing price')
fig.update_layout(xaxis_title='Closing price',yaxis_title='ECDF')
fig.update_layout({'plot_bgcolor':'white'})
fig.update_xaxes(showgrid=False)
fig.update_xaxes(showgrid=False)
fig.show()

<a name='2021'></a>
# 2021


In [None]:
data.loc['2021'].count()

In [None]:
duration('2021')

In [None]:
plot('2021','Open','Close')

In [None]:
plot('2020','High','Low')

In [None]:
X_ecdf,y_ecdf=ECDF(data.loc['2021'].Close.values)
fig=px.scatter(x=X_ecdf,y=y_ecdf,title='ECDF Plot for the Closing price')
fig.update_layout(xaxis_title='Closing price',yaxis_title='ECDF')
fig.update_layout({'plot_bgcolor':'white'})
fig.update_xaxes(showgrid=False)
fig.update_xaxes(showgrid=False)
fig.show()

<a name='differ'></a>
## Analyzing the differnce closing prices for each year

In [None]:
data['diff_Close']=data.Close.diff()
data.head()


In [None]:
def diff(year):
    fig=px.line(data.loc[year],x=data.loc[year].index,y=data.loc[year].diff_Close)
    fig.update_layout(title=f'The diffirence of closing prices of {year}',yaxis_title='Difference'
                     ,xaxis_title='Month')
    fig.update_xaxes(showgrid=False)
    return fig.show()

<a name='differ_2016'></a>
# 2016

In [None]:
diff('2016')

<a name='differ_2017'></a>
# 2017

In [None]:
diff('2017')

<a name='differ_2018'></a>
# 2018

In [None]:
diff('2018')

<a name='differ_2019'></a>
# 2019

In [None]:
diff('2019')

<a name='differ_2020'></a>
# 2020

In [None]:
diff('2020')

<a name='differ_2021'></a>
# 2021

In [None]:
diff('2021')

In [None]:
#

# senario : how much 1000 would be worth if invested at 2016-08-16
<a name='senario'></a>

In [None]:
Return=data.Close.pct_change()
Return

In [None]:
Return_plus_one=Return.add(1)
cumulative_Retuen=Return_plus_one.cumprod()
cumulative_Retuen

In [None]:
investment=cumulative_Retuen.mul(1000)
investment

In [None]:
fig=px.line(investment,title='how much 1000$ would be worth if invested at 2016-08-16')
fig.update_layout(xaxis_title='Date',yaxis_title='the Return')
fig.update_layout({'plot_bgcolor':'white'})
fig.update_xaxes(showgrid=False)
fig.update_xaxes(showgrid=False)
fig.show()