<a href="https://colab.research.google.com/github/suleymanacikgoz/Global_AI_Hub_Project/blob/main/globalaihubproje.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [47]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns

In [38]:
from pandas_datareader.data import DataReader
import yfinance as yf
from pandas_datareader import data as pdr

yf.pdr_override()

# For time stamps
from datetime import datetime


# The tech stocks we'll use for this analysis
tech_list = ['AAPL', 'GOOG', 'MSFT', 'AMZN']

# Set up End and Start times for data grab
tech_list = ['AAPL', 'GOOG', 'MSFT', 'AMZN']

end = datetime.now()
start = datetime(end.year - 1, end.month, end.day)

for stock in tech_list:
    globals()[stock] = yf.download(stock, start, end)


company_list = [AAPL, GOOG, MSFT, AMZN]
company_name = ["APPLE", "GOOGLE", "MICROSOFT", "AMAZON"]

for company, com_name in zip(company_list, company_name):
    company["company_name"] = com_name

df = pd.concat(company_list, axis=0)
df.tail(10)



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,company_name
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-05-02,180.850006,185.100006,179.910004,184.720001,184.720001,54303500,AMAZON
2024-05-03,186.990005,187.869995,185.419998,186.210007,186.210007,39172000,AMAZON
2024-05-06,186.279999,188.75,184.800003,188.699997,188.699997,34725300,AMAZON
2024-05-07,188.919998,189.940002,187.309998,188.759995,188.759995,34048900,AMAZON
2024-05-08,187.440002,188.429993,186.389999,188.0,188.0,26136400,AMAZON
2024-05-09,188.880005,191.699997,187.440002,189.5,189.5,43368400,AMAZON
2024-05-10,189.160004,189.889999,186.929993,187.479996,187.479996,34141800,AMAZON
2024-05-13,188.0,188.309998,185.360001,186.570007,186.570007,24898600,AMAZON
2024-05-14,183.820007,187.720001,183.449997,187.070007,187.070007,38652200,AMAZON
2024-05-15,185.770004,186.719299,182.729996,183.639999,183.639999,25002746,AMAZON


In [39]:
def check_df(dataframe, head=5):
    print("##################### Shape #####################")
    print(dataframe.shape)
    print("##################### Types #####################")
    print(dataframe.dtypes)
    print("##################### Head #####################")
    print(dataframe.head(head))
    print("##################### Tail #####################")
    print(dataframe.tail(head))
    print("##################### NA #####################")
    print(dataframe.isnull().sum())
    print("##################### Quantiles #####################")
    print(dataframe.quantile([0, 0.05, 0.50, 0.95, 0.99, 1]).T)

def cat_summary(dataframe, col_name, plot=False):
    print(pd.DataFrame({col_name: dataframe[col_name].value_counts(),
                        "Ratio": 100 * dataframe[col_name].value_counts() / len(dataframe)}))
    print("##########################################")
    if plot:
        sns.countplot(x=dataframe[col_name], data=dataframe)
        plt.show(block=True)

def num_summary(dataframe, numerical_col, plot=False):
    quantiles = [0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90, 0.95, 0.99]
    print(dataframe[numerical_col].describe(quantiles).T)

    if plot:
        dataframe[numerical_col].hist(bins=20)
        plt.xlabel(numerical_col)
        plt.title(numerical_col)
        plt.show(block=True)

def target_summary_with_num(dataframe, target, numerical_col):
    print(dataframe.groupby(target).agg({numerical_col: "mean"}), end="\n\n\n")

def target_summary_with_cat(dataframe, target, categorical_col):
    print(pd.DataFrame({"TARGET_MEAN": dataframe.groupby(categorical_col)[target].mean()}), end="\n\n\n")

def correlation_matrix(df, cols):
    fig = plt.gcf()
    fig.set_size_inches(10, 8)
    plt.xticks(fontsize=10)
    plt.yticks(fontsize=10)
    fig = sns.heatmap(df[cols].corr(), annot=True, linewidths=0.5, annot_kws={'size': 12}, linecolor='w', cmap='RdBu')
    plt.show(block=True)


def grab_col_names(dataframe, cat_th=13, car_th=20):


    # cat_cols, cat_but_car
    cat_cols = [col for col in dataframe.columns if dataframe[col].dtypes == "O"]
    num_but_cat = [col for col in dataframe.columns if dataframe[col].nunique() < cat_th and
                   dataframe[col].dtypes != "O"]
    cat_but_car = [col for col in dataframe.columns if dataframe[col].nunique() > car_th and
                   dataframe[col].dtypes == "O"]
    cat_cols = cat_cols + num_but_cat
    cat_cols = [col for col in cat_cols if col not in cat_but_car]

    # num_cols
    num_cols = [col for col in dataframe.columns if dataframe[col].dtypes != "O"]
    num_cols = [col for col in num_cols if col not in num_but_cat]

    print(f"Observations: {dataframe.shape[0]}")
    print(f"Variables: {dataframe.shape[1]}")
    print(f'cat_cols: {len(cat_cols)}')
    print(f'num_cols: {len(num_cols)}')
    print(f'cat_but_car: {len(cat_but_car)}')
    print(f'num_but_cat: {len(num_but_cat)}')

    return cat_cols, num_cols, cat_but_car


In [41]:
cat_cols, num_cols, cat_but_car=grab_col_names(df)

Observations: 1012
Variables: 7
cat_cols: 1
num_cols: 6
cat_but_car: 0
num_but_cat: 0


In [40]:
# Summary Stats
AAPL.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,253.0,253.0,253.0,253.0,253.0,253.0
mean,181.607984,183.075593,180.231107,181.69853,181.107149,58220810.0
std,8.514838,8.346882,8.4857,8.463058,8.387803,18832690.0
min,165.350006,166.399994,164.080002,165.0,164.776505,16827600.0
25%,174.199997,176.100006,173.100006,174.210007,173.523712,46792900.0
50%,181.5,182.929993,180.0,181.559998,181.103729,53665600.0
75%,189.259995,189.990005,187.610001,189.369995,188.713638,64885400.0
max,198.020004,199.619995,197.0,198.110001,197.589523,163224100.0


In [44]:
for company in company_list:
  print(company.describe())

             Open        High         Low       Close   Adj Close  \
count  253.000000  253.000000  253.000000  253.000000  253.000000   
mean   181.607984  183.075593  180.231107  181.698530  181.107149   
std      8.514838    8.346882    8.485700    8.463058    8.387803   
min    165.350006  166.399994  164.080002  165.000000  164.776505   
25%    174.199997  176.100006  173.100006  174.210007  173.523712   
50%    181.500000  182.929993  180.000000  181.559998  181.103729   
75%    189.259995  189.990005  187.610001  189.369995  188.713638   
max    198.020004  199.619995  197.000000  198.110001  197.589523   

             Volume  
count  2.530000e+02  
mean   5.822081e+07  
std    1.883269e+07  
min    1.682760e+07  
25%    4.679290e+07  
50%    5.366560e+07  
75%    6.488540e+07  
max    1.632241e+08  
             Open        High         Low       Close   Adj Close  \
count  253.000000  253.000000  253.000000  253.000000  253.000000   
mean   138.024747  139.533541  136.819059 

In [64]:
import plotly.graph_objs as go
from plotly.subplots import make_subplots

fig = make_subplots(rows=2, cols=2, subplot_titles=[f"{tech} için Kapanış Fiyatı" for tech in tech_list])

for i, company in enumerate(company_list, 1):
    row = (i - 1) // 2 + 1
    col = (i - 1) % 2 + 1
    trace = go.Scatter(x=company.index, y=company['Adj Close'], mode='lines', name=f"{tech_list[i - 1]}")
    fig.add_trace(trace, row=row, col=col)

fig.update_layout(
    height=1000, width=1200,
    title="Historical View of the Closing Price",
    showlegend=True,
    xaxis=dict(title="Tarih"),
    yaxis=dict(title="Kapanış Fiyatı"),
    title_font=dict(size=24),
)


fig.show()


In [63]:
import plotly.graph_objs as go
from plotly.subplots import make_subplots

# Create subplots with specified rows and columns
fig = make_subplots(rows=2, cols=2, subplot_titles=[f"{tech} için Günlük İşlem Hacmi" for tech in tech_list])

# Loop through each company and add its Volume data to the corresponding subplot
for i, (company, tech) in enumerate(zip(company_list, tech_list), 1):
    row = (i - 1) // 2 + 1
    col = (i - 1) % 2 + 1
    trace = go.Scatter(x=company.index, y=company['Volume'], mode='lines', name=f"{tech}")
    fig.add_trace(trace, row=row, col=col)

# Update layout to set axis labels and adjust subplot titles
fig.update_layout(
    height=800, width=1000,  # You can adjust the size as needed
    title="Hisse Senetlerinin Günlük İşlem Hacmi",
    showlegend=True,
    xaxis=dict(title="Tarih"),
    yaxis=dict(title="Hacim"),
    title_font=dict(size=20),
)

# Show the plot
fig.show()
