# 1. Overview

# 2. Data Understanding

## 2.1. Data Description

## 2.2. Import Necessary Libraries

In [1]:
import pandas as pd

## 2.3. Define global variables

In [2]:
input_data_asml = 'data/weekly_data_asml.csv'
input_data_suppliers = 'data/weekly_data_suppliers.csv'
output_data = 'data/weekly_data_combination.csv'

## 2.4. Functions

## 2.5. Code

### 2.5.1. Looking at the datasets

**ASML**

In [3]:
weekly_data_asml = pd.read_csv(input_data_asml, sep=';')

# Ensure the 'Date' column is in datetime format
weekly_data_asml['Date'] = pd.to_datetime(weekly_data_asml['Date'])

# Set the 'Date' column as the index to make it a DatetimeIndex
weekly_data_asml.set_index('Date', inplace=True)

# Explicitly set the frequency of the Date index
weekly_data_asml = weekly_data_asml.asfreq('W')

# Rename the 'Close' column to 'Close_ASML'
weekly_data_asml.rename(columns={'Close': 'Close_ASML'}, inplace=True)

weekly_data_asml

Unnamed: 0_level_0,Close_ASML
Date,Unnamed: 1_level_1
2013-01-13,56.546032
2013-01-20,57.203339
2013-01-27,62.106476
2013-02-03,66.858612
2013-02-10,67.498146
...,...
2024-10-13,832.038757
2024-10-20,721.585144
2024-10-27,710.051941
2024-11-03,683.830017


**Suppliers**

In [4]:
weekly_data_suppliers = pd.read_csv(input_data_suppliers, sep=';')

# Ensure the 'Date' column is in datetime format
weekly_data_suppliers['Date'] = pd.to_datetime(weekly_data_suppliers['Date'])

# Set the 'Date' column as the index to make it a DatetimeIndex
weekly_data_suppliers.set_index('Date', inplace=True)

# Explicitly set the frequency of the Date index
weekly_data_suppliers = weekly_data_suppliers.asfreq('W')

weekly_data_suppliers

Unnamed: 0_level_0,Close_Carl Zeiss Meditec,Close_Lam Research Corporation,Close_Tokyo Electron Limited
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-01-06,25.741583,1.532497,7.766667
2013-01-13,26.057859,1.512370,7.480000
2013-01-20,27.226334,1.542561,7.460000
2013-01-27,27.674398,1.613812,7.406667
2013-02-03,28.728661,1.679427,7.140000
...,...,...,...
2024-10-13,72.779999,82.059998,86.790001
2024-10-20,67.094002,74.139999,81.540001
2024-10-27,67.529999,73.000000,76.709999
2024-11-03,65.550003,75.449997,77.500000


Let's create a weekly_data dataframe from both weekly_data_asml and weekly_data_suppliers

In [5]:
weekly_data = weekly_data_asml.join(weekly_data_suppliers, how='inner')
weekly_data

Unnamed: 0_level_0,Close_ASML,Close_Carl Zeiss Meditec,Close_Lam Research Corporation,Close_Tokyo Electron Limited
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2013-01-13,56.546032,26.057859,1.512370,7.480000
2013-01-20,57.203339,27.226334,1.542561,7.460000
2013-01-27,62.106476,27.674398,1.613812,7.406667
2013-02-03,66.858612,28.728661,1.679427,7.140000
2013-02-10,67.498146,29.343645,1.713643,6.853333
...,...,...,...,...
2024-10-13,832.038757,72.779999,82.059998,86.790001
2024-10-20,721.585144,67.094002,74.139999,81.540001
2024-10-27,710.051941,67.529999,73.000000,76.709999
2024-11-03,683.830017,65.550003,75.449997,77.500000


Display the total number of nulls per column


In [6]:
nulls_per_column = weekly_data.isnull().sum()

nulls_per_column

Close_ASML                        0
Close_Carl Zeiss Meditec          0
Close_Lam Research Corporation    0
Close_Tokyo Electron Limited      0
dtype: int64

### 2.5.2 Add binary variables

In [7]:
# 1. Add binary variable for COVID period
# Define the date range for the COVID period
covid_start = '2020-01-01'
covid_end = '2022-12-31'

# Add a new binary column to indicate the COVID period
weekly_data['COVID_Period'] = ((weekly_data.index >= covid_start) & 
                               (weekly_data.index <= covid_end)).astype(int)

# 2. Add binary variable for Geopolitical Tensions
geopolitical_tension_start = '2022-02-01'  # Example: start of the Ukraine conflict
geopolitical_tension_end = '2025-12-31'
weekly_data['Geopolitical_Tension'] = ((weekly_data.index >= geopolitical_tension_start) & 
                                       (weekly_data.index <= geopolitical_tension_end)).astype(int)

# 3. Add binary variable for Trade Sanctions
trade_sanctions_start = '2018-07-01'  # Example: US-China trade war period
trade_sanctions_end = '2029-12-31'
weekly_data['Trade_Sanctions'] = ((weekly_data.index >= trade_sanctions_start) & 
                                  (weekly_data.index <= trade_sanctions_end)).astype(int)

# 4. Add binary variable for Tech Regulation
tech_regulation_start = '2020-06-01'  # Example: US restrictions on semiconductor exports
tech_regulation_end = '2029-12-31'
weekly_data['Tech_Regulation'] = ((weekly_data.index >= tech_regulation_start) & 
                                  (weekly_data.index <= tech_regulation_end)).astype(int)

# 5. Add binary variable for New Product Launch by ASML
new_product_launch_start = '2019-01-01'  # Example: new EUV machine launch
new_product_launch_end = '2019-03-31'
weekly_data['New_Product_Launch'] = ((weekly_data.index >= new_product_launch_start) & 
                                     (weekly_data.index <= new_product_launch_end)).astype(int)

# 6. Add another binary variable for New Product Launch TWINSCAN EXE:5000 by ASML
new_product_launch_start = '2023-12-01'  # Example: new EUV machine launch
new_product_launch_end = '2025-12-31'
weekly_data['New_Product_Launch'] = ((weekly_data.index >= new_product_launch_start) & 
                                     (weekly_data.index <= new_product_launch_end)).astype(int)

# 7. Add binary variable for Israel-Gaza Conflict
israel_gaza_conflict_start = '2023-10-07'  # Start date of the conflict
israel_gaza_conflict_end = '2025-12-31'    # Tentative end date, can be updated as needed
weekly_data['Israel_Gaza_Conflict'] = ((weekly_data.index >= israel_gaza_conflict_start) & 
                                       (weekly_data.index <= israel_gaza_conflict_end)).astype(int)

# 8. Add binary variable for Economic Stimulus
economic_stimulus_start = '2020-03-01'  # Example: US CARES Act during COVID-19
economic_stimulus_end = '2020-12-31'
weekly_data['Economic_Stimulus'] = ((weekly_data.index >= economic_stimulus_start) & 
                                    (weekly_data.index <= economic_stimulus_end)).astype(int)


In [8]:
weekly_data

Unnamed: 0_level_0,Close_ASML,Close_Carl Zeiss Meditec,Close_Lam Research Corporation,Close_Tokyo Electron Limited,COVID_Period,Geopolitical_Tension,Trade_Sanctions,Tech_Regulation,New_Product_Launch,Israel_Gaza_Conflict,Economic_Stimulus
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2013-01-13,56.546032,26.057859,1.512370,7.480000,0,0,0,0,0,0,0
2013-01-20,57.203339,27.226334,1.542561,7.460000,0,0,0,0,0,0,0
2013-01-27,62.106476,27.674398,1.613812,7.406667,0,0,0,0,0,0,0
2013-02-03,66.858612,28.728661,1.679427,7.140000,0,0,0,0,0,0,0
2013-02-10,67.498146,29.343645,1.713643,6.853333,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
2024-10-13,832.038757,72.779999,82.059998,86.790001,0,1,1,1,1,1,0
2024-10-20,721.585144,67.094002,74.139999,81.540001,0,1,1,1,1,1,0
2024-10-27,710.051941,67.529999,73.000000,76.709999,0,1,1,1,1,1,0
2024-11-03,683.830017,65.550003,75.449997,77.500000,0,1,1,1,1,1,0


# 3. Export

In [9]:
# Export the weekly_data_suppliers to a CSV file
weekly_data.to_csv(output_data, index=True, sep=';')