# Import library and API key

In [None]:
from google.colab import userdata
api_key = userdata.get('sectors_api')

In [None]:
import pandas as pd
import requests
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rcParams
from datetime import timedelta
import datetime

import statsmodels.api as sm

# Function

In [None]:
# Create list of date function
def get_date_list(start_date):
    start_date = datetime.datetime.strptime(start_date, '%Y-%m-%d')

    end_date = datetime.datetime.today()

    date_list = []

    while start_date < end_date:
        date_list.append(start_date)
        start_date += timedelta(days=90)

    date_list.append(end_date)

    return date_list

## Correlation Analysis

In [None]:
# Sector's Market Cap data Fetching
def fetch_sectors_market_cap(sector_list):
  # Initiate empty data frame
  df_sec = pd.DataFrame()

  # Iterate through all stock name
  for i in sector_list:
      # Replace the URL with a URL from the Available Endpoints section
      url = f"https://api.sectors.app/v1/sector/report/{i}/?sections=market_cap"

      headers = {
          "Authorization": api_key
      }

      response = requests.get(url, headers = headers)

      if response.status_code == 200:
          data = response.json()
      else:
          # Handle error
          print(response.status_code)

      # Data manipulation
      df_mcap = pd.DataFrame(pd.DataFrame(data).T["quarterly_market_cap"]).T

      df_mcap = pd.json_normalize(df_mcap["market_cap"][0]).T.reset_index()
      df_mcap.columns = ["measurement","value"]
      df_mcap[['measurement', 'year','quarter']] = df_mcap['measurement'].str.split('.', expand=True)
      df_mcap["sector"] = i

      # Combine new fetched data with existing data
      df_sec = pd.concat([df_sec,df_mcap])

  return(df_sec)

In [None]:
# Data Cleansing
def sectors_market_cap_data_cleansing(df_sec):
  #Convert long table to wide table
  df_sec_pivot = df_sec.pivot(index=["measurement","year","quarter"],columns="sector",values="value").reset_index()

  # Remove 'current_ttm_mcap_pavg' from measurement column
  df_sec_pivot = df_sec_pivot[df_sec_pivot.measurement != 'current_ttm_mcap_pavg']

  return(df_sec_pivot)

In [None]:
# Historical plot
def historical_mc_plot(df_sec):
  # Use the same sector data
  df_sec_lags = df_sec.copy()

  # Data Manipulation
  df_sec_lags['date'] = pd.to_datetime(df_sec['year'].astype(str) + '-' + df_sec['quarter'])
  df_sec_lags = df_sec_lags.rename(columns={"value":"mcap"})
  df_sec_lags = df_sec_lags[df_sec_lags.measurement != "current_ttm_mcap_pavg"].sort_values(["sector","year","quarter"])

  # Create the line chart
  plt.figure(figsize=(12, 6))
  for sector in df_sec_lags['sector'].unique():
      plt.plot(df_sec_lags[df_sec_lags['sector'] == sector]['date'],
              df_sec_lags[df_sec_lags['sector'] == sector]['mcap'],
              marker='o',
              label=sector)

  plt.xlabel('Quarter')
  plt.ylabel('Market Cap Value')
  plt.title('Market Cap Value of Sectors Over Quarters and Years')
  plt.legend()
  plt.show()

In [None]:
# Lag Correlation between Sectors
def lag_correlation_sectors(df_sec_cleaned):
  # Create date column using year and quarter column
  df_sec_cleaned['date'] = pd.to_datetime(df_sec_cleaned['year'].astype(str) + '-' + df_sec_cleaned['quarter'])

  # Make a copy dataframe so it will not overwrite the original data
  df_sec_shifted = df_sec_cleaned.copy()

  # Shift data
  for i in ['banks', 'basic-materials','food-beverage', 'oil-gas-coal', 'telecommunication', 'utilities']:
    df_sec_shifted[f'{i}_shifted_2'] = df_sec_shifted[i].shift(2)

  # Calculate correlation
  corr_matrix = df_sec_shifted.drop(['measurement', 'year', 'quarter','date'],axis = 1).corr()
  corr_matrix_filtered = corr_matrix.reset_index()[corr_matrix.reset_index().sector.isin(['banks', 'basic-materials','food-beverage', 'oil-gas-coal', 'telecommunication', 'utilities'])]
  corr_matrix_filtered.set_index("sector",inplace=True)

  # Plot the heatmap
  plt.figure(figsize=(30, 12))
  sns.heatmap(corr_matrix_filtered.drop(['banks', 'basic-materials','food-beverage', 'oil-gas-coal', 'telecommunication', 'utilities'],axis=1), annot=True, cmap=plt.cm.RdBu, vmin=-1, vmax=1)
  plt.title('Correlation Matrix')
  plt.show()

In [None]:
def fetch_top_companies_per_sector(date,stock_list):
  df_daily_hist = pd.DataFrame()

  for i in stock_list:
    for j in range (0,len(date)-1):
        if j==0:
            start_date = date[j]
            start_date = start_date.strftime('%Y-%m-%d')

            end_date = date[j+1]
            end_date = end_date.strftime('%Y-%m-%d')
        else:
            start_date = date[j]+ timedelta(days=1)
            start_date = start_date.strftime('%Y-%m-%d')

            end_date = date[j+1]
            end_date = end_date.strftime('%Y-%m-%d')

        url = f"https://api.sectors.app/v1/daily/{i}/?start={start_date}&end={end_date}"


        headers = {
            "Authorization": api_key
        }

        response = requests.get(url, headers = headers)

        if response.status_code == 200:
            data = response.json()
            df_daily_hist = pd.concat([df_daily_hist,pd.DataFrame(data)])

  return df_daily_hist

In [None]:
def top_companies_data_clean_correlation(df_daily_hist):
  # Date type manipulation
  df_daily_hist["date"] = pd.to_datetime(df_daily_hist["date"])
  df_daily_hist["month"] = df_daily_hist["date"].dt.month
  df_daily_hist["year"] = df_daily_hist["date"].dt.year

  df_daily_hist["close_shift_6_months"] = df_daily_hist.groupby("symbol")["close"].shift(180)

  return(df_daily_hist)

## Seasonality

In [None]:
def get_top_2_companies_per_sectors(sector_list):
  # Initiate empty data frame
  df_top_2_comp = pd.DataFrame()

  # Iterate through all stock name
  for i in sector_list:
      url = f"https://api.sectors.app/v1/sector/report/{i}/?sections=companies"


      headers = {
          "Authorization": api_key
      }

      response = requests.get(url, headers = headers)

      if response.status_code == 200:
          data = response.json()
      else:
          # Handle error
          print(response.status_code)

      # Data Manipulation
      df_company = pd.json_normalize(pd.json_normalize(pd.DataFrame(pd.DataFrame(data).T["top_companies"]).T["companies"][0])["top_mcap"][0])
      df_company['sub_sector'] = i

      # Combine new sector fetched with existing data
      df_top_2_comp = pd.concat([df_top_2_comp,df_company])

  # Take two largest company per sub-sector
  df_top_2_comp = df_top_2_comp.groupby('sub_sector').apply(lambda x: x.nlargest(2, 'market_cap')).drop("sub_sector",axis=1).reset_index().drop("level_1",axis=1)

  return df_top_2_comp

In [None]:
def seasonality_daily_price_cleansing(df_daily_hist):
  # Merge daily data and top companies data to get the subsector for each companies
  df_daily_hist = df_daily_hist.merge(df_top_2_comp[["sub_sector","symbol"]], on = "symbol")

  # Make a new column to combine the symbol and subsector
  df_daily_hist["symbol_sub_sec"] = df_daily_hist["symbol"] + "_" + df_daily_hist["sub_sector"]

  # Create long data to wide data
  df_daily_hist = df_daily_hist.pivot(index=["date"],columns="symbol_sub_sec",values="close").reset_index()

  # Drop columns with null value more than 50% of the data
  df_daily_hist = df_daily_hist.dropna(thresh=df_daily_hist.shape[0]*0.5,axis=1)

  return df_daily_hist

# Sectors Correlation Analysis

## Market Capitalization Correlation

In [None]:
# Read data from API
stock_list = ["banks","basic-materials","oil-gas-coal","utilities",'food-beverage','telecommunication']


In [None]:
# Read data from file
df_sec = pd.read_csv("/content/sectors_marketcap.csv")

In [None]:
# Show data


In [None]:
# Sectors Market Cap Data Cleansing and Processing


In [None]:
# Drop some variables and calculate correlation


# Plot the heatmap


## Lags Correlation

### Sectors to Sectors Lag Correlation

In [None]:
# Historical Sectors's Market Capitalization Line Plot


In [None]:
# Lag Correlation Between Sectors


### Take Companies From the Highest Correlation Sector

In [None]:
# Fetch Top Companies Daily data
stock_list = ['BYAN', 'DSSA.JK', 'CUAN.JK', 'ADRO.JK', 'ADMR.JK','PGEO.JK','POWR.JK','KEEN.JK','ARKO.JK']

date = get_date_list("2019-01-01")



In [None]:
# Top Company Correlation Fata Data Cleansing and Processing


#####2023

In [None]:
# Calculate Correlation between Companies for year 2023
df_daily_hist_2023 = df_daily_hist[df_daily_hist.year==2023]
df_daily_hist_2023 = df_daily_hist_2023.pivot(index="date",columns="symbol",values=["close","close_shift_6_months"])
df_daily_hist_2023_corr = df_daily_hist_2023.corr().dropna(how='all').dropna(how='all',axis=1)
df_daily_hist_2023_corr.loc[[('close_shift_6_months',"ARKO.JK"),('close_shift_6_months',"POWR.JK"),('close_shift_6_months',"KEEN.JK"),('close_shift_6_months',"PGEO.JK")],[('close',"ADRO.JK"),('close',"BYAN.JK"),('close',"DSSA.JK"),('close',"ADMR.JK"),('close',"CUAN.JK")]]

##### 2024

In [None]:
# Calculate Correlation between Companies for year 2024


# Seasonality Analysis

## Get 2 Top Companies by Market Cap per Sectors

In [None]:
# Specify what stock to be analyzed
sector_list = ["banks","basic-materials","oil-gas-coal","utilities"]

# Fetch 2 Top 2 Companies in each Sectors


In [None]:
# Read Data
df_top_2_comp = pd.read_csv("/content/top_2_companies.csv")

In [None]:
# Show data
df_top_2_comp

## Historical Close Price

In [None]:
# Fetch historical daily transaction data
date = get_date_list("2019-01-01")



In [None]:
# Read Data
df_daily = pd.read_csv("/content/stocks_daily_data.csv")

In [None]:
# Seasonality Daily Data Cleansing & Processing


In [None]:
# Plot Historical Close Price of some Major Companies in Indonesia


## Seasonality Decompose

Every time series data, has 3 components to create a value

y = Trend + Seasonality + Residual (Additive)

y = Trend x Seasonality x Residual (Multiplicative)

In [None]:
# Datetime data manipulation
df_daily_seas = df_daily.copy()


In [None]:
# Seasonal Decomposition for One Stocks


In [None]:
# Extract Decomposition Value for each Stock
seasonality_dict = {}
trend_dict = {}

for ts in df_daily_seas.columns:
    decompositions = sm.tsa.seasonal_decompose(df_daily_seas[ts].fillna(method='ffill'),period=12)
    # Store the results back
    seasonality_dict[ts] = decompositions.seasonal
    trend_dict[ts] = decompositions.trend

In [None]:
# Seasonality Plot


In [None]:
# Trend Plot
