In [None]:
!pip install tabulate

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from tabulate import tabulate
import matplotlib.pyplot as plt
import seaborn as sns
from learntools.time_series.style import *

from sklearn.linear_model import LinearRegression
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
ls = ['big_tech_stock_prices', 'big_tech_companies']

def info(df):
    info = []
    for col in df.columns:
        dtype = df[col].dtype
        non_nulls = df[col].notna().sum()
        nulls = df[col].isna().sum()
        info.append([col, dtype, non_nulls, nulls])
    
    return tabulate(info, headers=["Column", "Dtype", "Non-Null Count", "Null Count"], tablefmt="fancy_grid")


# Load DataFrames into a dictionary
dfs = {name: pd.read_csv(f'/kaggle/input/big-tech-giants-stock-price-data/{name}.csv') for name in ls}

# Print beautified info for each DataFrame
for name, df in dfs.items():
    print(f"\n{name}\n{'=' * len(name)}")
    print(info(df))

In [None]:
df = dfs['big_tech_stock_prices']
df.head()

In [None]:
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)

unique_dates = df.index.unique()
date_to_int = {date: idx for idx, date in enumerate(unique_dates)}

df['time'] = df.index.map(date_to_int)
df.head()

In [None]:
df = df[(df['stock_symbol'] == 'CRM') & (df.index.year == 2010)]
df

In [None]:
# Plotting
plt.style.use("seaborn-v0_8-whitegrid")
plt.rc(
    "figure",
    autolayout=True,
    figsize=(22, 8),
    titlesize=18,
    titleweight='bold',
)
plt.rc(
    "axes",
    labelweight="bold",
    labelsize="large",
    titleweight="bold",
    titlesize=16,
    titlepad=10,
)

%config InlineBackend.figure_format = 'retina'


fig, ax = plt.subplots()
sns.lineplot(x='time', y='adj_close', data=df, ax=ax, color='0.75')
sns.regplot(x='time', y='adj_close', data=df, ax=ax, ci=None, scatter_kws=dict(color='0.25'))


ax.set_title('Total Sales Over Time', fontsize=40)
ax.set_xlabel('Time', fontsize=32)
ax.set_ylabel('Total Sales', fontsize=32)

ax.tick_params(axis='both', labelsize=28)

plt.show()

In [None]:
trend = df['adj_close'].rolling(
    window=7,
    center=True,
    min_periods=4,
).mean()


# Make a plot
ax = df['adj_close'].plot(**plot_params, alpha=0.5)
ax = trend.plot(ax=ax, linewidth=3)

In [None]:
from statsmodels.tsa.deterministic import DeterministicProcess

y = df['adj_close'].copy()  # the target

dp = DeterministicProcess(index = df.index, order=2)

X = dp.in_sample()

X_fore = dp.out_of_sample(steps=30)

forecast_dates = pd.date_range(start=df.index[-1] + pd.Timedelta(days=1), periods=30, freq='D')
X_fore.index = forecast_dates

In [None]:
model = LinearRegression()
model.fit(X, y)

y_pred = pd.Series(model.predict(X), index=X.index)
y_fore = pd.Series(model.predict(X_fore), index=X_fore.index)

ax = y.plot(**plot_params, alpha=0.5, title="Average Close", ylabel="items sold")
ax = y_pred.plot(ax=ax, linewidth=3, label="Trend", color='C0')
ax = y_fore.plot(ax=ax, linewidth=3, label="Trend Forecast", color='C3')
ax.legend();