# Clean Additional Data & Keep All iPhone Rows

In [None]:
%cd ../../../../data/p_dsi/teams2023/asurion_data/

In [138]:
import pandas as pd
import numpy as np
import copy
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

df = pd.read_excel("Asurion_data_additional.xlsx", index_col = 0)

Note: This additional data is in the same state as the original data when we first received it, that means that we will have to do the same pre-processing we did before.

## Step 1: Quick Data Investigation

In [133]:
df.isnull().sum()

phone model     0
phone size      0
phone color     0
weeks_monday    0
claim           0
dtype: int64

No null values, so no need to do any null-value handling.

In [143]:
min(df['weeks_monday']), max(df['weeks_monday'])

(Timestamp('2023-02-06 00:00:00'), Timestamp('2023-02-27 00:00:00'))

Note: The additional data includes two of the same datapoints as the original one, "2023-02-06" and "20203-02-13". We will compare values at the end and see whether they match, they need to be combined, or the original datapoints need to be replaced with these new ones.

In [144]:
df.groupby(['phone size'])['phone size'].count().sort_values(ascending=False)

phone size
128gb    543
256gb    335
64gb     335
32gb      89
512gb     66
16gb      52
4gb        1
8gb        1
Name: phone size, dtype: int64

Note: This dataset does not have any rows with missing phone size, so we can skip that step.

## Step 2: Adding Columns

Add "brand" column

In [145]:
df["brand"] = df["phone model"].str.split(" ", n = 1, expand = True)[0]
df

Unnamed: 0,phone model,phone size,phone color,weeks_monday,claim,brand
29,alcatel smartflip 4052r,4gb,black,2023-02-27,5,alcatel
35,alcatel tetra,16gb,black,2023-02-27,1,alcatel
36,alcatel volta,16gb,gray,2023-02-27,1,alcatel
161,apple iphone 11,128gb,black,2023-02-06,205,apple
162,apple iphone 11,128gb,black,2023-02-13,189,apple
...,...,...,...,...,...,...
28693,samsung galaxy z fold3,256gb,silver,2023-02-27,22,samsung
28695,samsung galaxy z fold3,512gb,black,2023-02-06,58,samsung
28696,samsung galaxy z fold3,512gb,black,2023-02-13,100,samsung
28697,samsung galaxy z fold3,512gb,black,2023-02-20,94,samsung


Add "model" column

In [146]:
df["model"] = df["phone model"].str.split(" ", n = 1, expand = True)[1]
df

Unnamed: 0,phone model,phone size,phone color,weeks_monday,claim,brand,model
29,alcatel smartflip 4052r,4gb,black,2023-02-27,5,alcatel,smartflip 4052r
35,alcatel tetra,16gb,black,2023-02-27,1,alcatel,tetra
36,alcatel volta,16gb,gray,2023-02-27,1,alcatel,volta
161,apple iphone 11,128gb,black,2023-02-06,205,apple,iphone 11
162,apple iphone 11,128gb,black,2023-02-13,189,apple,iphone 11
...,...,...,...,...,...,...,...
28693,samsung galaxy z fold3,256gb,silver,2023-02-27,22,samsung,galaxy z fold3
28695,samsung galaxy z fold3,512gb,black,2023-02-06,58,samsung,galaxy z fold3
28696,samsung galaxy z fold3,512gb,black,2023-02-13,100,samsung,galaxy z fold3
28697,samsung galaxy z fold3,512gb,black,2023-02-20,94,samsung,galaxy z fold3


Create a new column in the data called "generation"

In [147]:
df["generation"] = df["model"].str.split(" ", n = 1, expand = True)[1]
df

Unnamed: 0,phone model,phone size,phone color,weeks_monday,claim,brand,model,generation
29,alcatel smartflip 4052r,4gb,black,2023-02-27,5,alcatel,smartflip 4052r,4052r
35,alcatel tetra,16gb,black,2023-02-27,1,alcatel,tetra,
36,alcatel volta,16gb,gray,2023-02-27,1,alcatel,volta,
161,apple iphone 11,128gb,black,2023-02-06,205,apple,iphone 11,11
162,apple iphone 11,128gb,black,2023-02-13,189,apple,iphone 11,11
...,...,...,...,...,...,...,...,...
28693,samsung galaxy z fold3,256gb,silver,2023-02-27,22,samsung,galaxy z fold3,z fold3
28695,samsung galaxy z fold3,512gb,black,2023-02-06,58,samsung,galaxy z fold3,z fold3
28696,samsung galaxy z fold3,512gb,black,2023-02-13,100,samsung,galaxy z fold3,z fold3
28697,samsung galaxy z fold3,512gb,black,2023-02-20,94,samsung,galaxy z fold3,z fold3


In the column "model", delete the generation information

In [148]:
df["model"] = df["model"].str.split(" ", n = 1, expand = True)[0]
df

Unnamed: 0,phone model,phone size,phone color,weeks_monday,claim,brand,model,generation
29,alcatel smartflip 4052r,4gb,black,2023-02-27,5,alcatel,smartflip,4052r
35,alcatel tetra,16gb,black,2023-02-27,1,alcatel,tetra,
36,alcatel volta,16gb,gray,2023-02-27,1,alcatel,volta,
161,apple iphone 11,128gb,black,2023-02-06,205,apple,iphone,11
162,apple iphone 11,128gb,black,2023-02-13,189,apple,iphone,11
...,...,...,...,...,...,...,...,...
28693,samsung galaxy z fold3,256gb,silver,2023-02-27,22,samsung,galaxy,z fold3
28695,samsung galaxy z fold3,512gb,black,2023-02-06,58,samsung,galaxy,z fold3
28696,samsung galaxy z fold3,512gb,black,2023-02-13,100,samsung,galaxy,z fold3
28697,samsung galaxy z fold3,512gb,black,2023-02-20,94,samsung,galaxy,z fold3


In the column "generation", delete the model information

In [149]:
df["generation"] = df["generation"].str.split(" ", n = 1, expand = True)[0]
df

Unnamed: 0,phone model,phone size,phone color,weeks_monday,claim,brand,model,generation
29,alcatel smartflip 4052r,4gb,black,2023-02-27,5,alcatel,smartflip,4052r
35,alcatel tetra,16gb,black,2023-02-27,1,alcatel,tetra,
36,alcatel volta,16gb,gray,2023-02-27,1,alcatel,volta,
161,apple iphone 11,128gb,black,2023-02-06,205,apple,iphone,11
162,apple iphone 11,128gb,black,2023-02-13,189,apple,iphone,11
...,...,...,...,...,...,...,...,...
28693,samsung galaxy z fold3,256gb,silver,2023-02-27,22,samsung,galaxy,z
28695,samsung galaxy z fold3,512gb,black,2023-02-06,58,samsung,galaxy,z
28696,samsung galaxy z fold3,512gb,black,2023-02-13,100,samsung,galaxy,z
28697,samsung galaxy z fold3,512gb,black,2023-02-20,94,samsung,galaxy,z


Create a new column in the data called "week_of_month" from the "weeks_monday" column which says which counts the monday of each week

In [150]:
df["week_of_month"] = df["weeks_monday"].dt.day.apply(lambda x: (x-1)//7+1)
df

Unnamed: 0,phone model,phone size,phone color,weeks_monday,claim,brand,model,generation,week_of_month
29,alcatel smartflip 4052r,4gb,black,2023-02-27,5,alcatel,smartflip,4052r,4
35,alcatel tetra,16gb,black,2023-02-27,1,alcatel,tetra,,4
36,alcatel volta,16gb,gray,2023-02-27,1,alcatel,volta,,4
161,apple iphone 11,128gb,black,2023-02-06,205,apple,iphone,11,1
162,apple iphone 11,128gb,black,2023-02-13,189,apple,iphone,11,2
...,...,...,...,...,...,...,...,...,...
28693,samsung galaxy z fold3,256gb,silver,2023-02-27,22,samsung,galaxy,z,4
28695,samsung galaxy z fold3,512gb,black,2023-02-06,58,samsung,galaxy,z,1
28696,samsung galaxy z fold3,512gb,black,2023-02-13,100,samsung,galaxy,z,2
28697,samsung galaxy z fold3,512gb,black,2023-02-20,94,samsung,galaxy,z,3


Create a new column in the data called "month" from the "weeks_monday" column

In [151]:
df["month"] = df["weeks_monday"].dt.month
df

Unnamed: 0,phone model,phone size,phone color,weeks_monday,claim,brand,model,generation,week_of_month,month
29,alcatel smartflip 4052r,4gb,black,2023-02-27,5,alcatel,smartflip,4052r,4,2
35,alcatel tetra,16gb,black,2023-02-27,1,alcatel,tetra,,4,2
36,alcatel volta,16gb,gray,2023-02-27,1,alcatel,volta,,4,2
161,apple iphone 11,128gb,black,2023-02-06,205,apple,iphone,11,1,2
162,apple iphone 11,128gb,black,2023-02-13,189,apple,iphone,11,2,2
...,...,...,...,...,...,...,...,...,...,...
28693,samsung galaxy z fold3,256gb,silver,2023-02-27,22,samsung,galaxy,z,4,2
28695,samsung galaxy z fold3,512gb,black,2023-02-06,58,samsung,galaxy,z,1,2
28696,samsung galaxy z fold3,512gb,black,2023-02-13,100,samsung,galaxy,z,2,2
28697,samsung galaxy z fold3,512gb,black,2023-02-20,94,samsung,galaxy,z,3,2


Create a new column in the data called "year" from the "weeks_monday" column

In [152]:
df["year"] = df["weeks_monday"].dt.year
df

Unnamed: 0,phone model,phone size,phone color,weeks_monday,claim,brand,model,generation,week_of_month,month,year
29,alcatel smartflip 4052r,4gb,black,2023-02-27,5,alcatel,smartflip,4052r,4,2,2023
35,alcatel tetra,16gb,black,2023-02-27,1,alcatel,tetra,,4,2,2023
36,alcatel volta,16gb,gray,2023-02-27,1,alcatel,volta,,4,2,2023
161,apple iphone 11,128gb,black,2023-02-06,205,apple,iphone,11,1,2,2023
162,apple iphone 11,128gb,black,2023-02-13,189,apple,iphone,11,2,2,2023
...,...,...,...,...,...,...,...,...,...,...,...
28693,samsung galaxy z fold3,256gb,silver,2023-02-27,22,samsung,galaxy,z,4,2,2023
28695,samsung galaxy z fold3,512gb,black,2023-02-06,58,samsung,galaxy,z,1,2,2023
28696,samsung galaxy z fold3,512gb,black,2023-02-13,100,samsung,galaxy,z,2,2,2023
28697,samsung galaxy z fold3,512gb,black,2023-02-20,94,samsung,galaxy,z,3,2,2023


## Step 3: Merge Versions

In [139]:
df['phone model'].unique()

array(['alcatel smartflip 4052r', 'alcatel tetra', 'alcatel volta',
       'apple iphone 11', 'apple iphone 11 pro',
       'apple iphone 11 pro max', 'apple iphone 12',
       'apple iphone 12 mini', 'apple iphone 12 pro',
       'apple iphone 12 pro max', 'apple iphone 13',
       'apple iphone 13 mini', 'apple iphone 13 pro',
       'apple iphone 13 pro max', 'apple iphone 14',
       'apple iphone 14 plus', 'apple iphone 14 pro max',
       'apple iphone 4', 'apple iphone 5c', 'apple iphone 5s',
       'apple iphone 6', 'apple iphone 6 plus', 'apple iphone 6s',
       'apple iphone 6s plus', 'apple iphone 7', 'apple iphone 7 plus',
       'apple iphone 8', 'apple iphone 8 plus', 'apple iphone se',
       'apple iphone se 2020', 'apple iphone x', 'apple iphone xr',
       'apple iphone xs', 'apple iphone xs max', 'google pixel 4',
       'google pixel 5', 'google pixel 6', 'lg g6 duo', 'lg g8 thinq',
       'lg k30', 'lg k40', 'lg k92 5g', 'lg stylo 5 plus', 'lg v40 thinq',
       '

In [141]:

df['phone model'] = df['phone model'].str.replace('mini', '')
df['phone model'] = df['phone model'].str.replace('max', '')
df['phone model'] = df['phone model'].str.replace('xl', '')

df['phone model'] = df['phone model'].str.replace(' plus ', ' + ')
df['phone model'] = df['phone model'].str.replace('plus', '')
df['phone model'] = df['phone model'].str.replace('+', 'plus')
df['phone model'] = df['phone model'].str.replace('2020', '')
df['phone model'] = df['phone model'].str.replace('2022', '')

# Kyocera Durforce Pro
df['phone model'] = df['phone model'].str.replace(' pro ', ' - ')
df['phone model'] = df['phone model'].str.replace('pro', '')
df['phone model'] = df['phone model'].str.replace('-', 'pro')


df['phone model'] = df['phone model'].str.replace('5g', '')
# df['phone model'] = df['phone model'].str.replace('4a', '4')
df['phone model'] = df['phone model'].str.replace('5c', '5s')

df['phone model'] = df['phone model'].str.replace(' 32', '')
df['phone model'] = df['phone model'].str.replace(' 64', '')
df['phone model'] = df['phone model'].str.replace(' 128', '')
df['phone model'] = df['phone model'].str.replace(' 256', '')
df['phone model'] = df['phone model'].str.replace('ultra', '')
df['phone model'] = df['phone model'].str.replace('active', '')
df['phone model'] = df['phone model'].str.replace('edge', '')
df['phone model'] = df['phone model'].str.replace('10e', '10')

df['phone model'] = df['phone model'].str.strip()
df['phone model'].unique()

  df['phone model'] = df['phone model'].str.replace('+', 'plus')


array(['alcatel smartflip 4052r', 'alcatel tetra', 'alcatel volta',
       'apple iphone 11', 'apple iphone 12', 'apple iphone 13',
       'apple iphone 14', 'apple iphone 4', 'apple iphone 5s',
       'apple iphone 6', 'apple iphone 6s', 'apple iphone 7',
       'apple iphone 8', 'apple iphone se', 'apple iphone x',
       'apple iphone xr', 'apple iphone xs', 'google pixel 4',
       'google pixel 5', 'google pixel 6', 'lg g6 duo', 'lg g8 thinq',
       'lg k30', 'lg k40', 'lg k92', 'lg stylo 5', 'lg v40 thinq',
       'lg v60 thinq', 'lg velvet', 'motorola moto g stylus',
       'motorola one  ace', 'red hydrogen one', 'samsung galaxy a02s',
       'samsung galaxy a03s', 'samsung galaxy a10', 'samsung galaxy a11',
       'samsung galaxy a12', 'samsung galaxy a13', 'samsung galaxy a32',
       'samsung galaxy a51', 'samsung galaxy a71', 'samsung galaxy fold',
       'samsung galaxy j7', 'samsung galaxy note 20',
       'samsung galaxy note 5', 'samsung galaxy note 8',
       'samsung

In [153]:
len(df['phone model'].unique())

61

Add a new column in df called release 

In [154]:
df['release'] = pd.to_datetime(np.nan)
df

Unnamed: 0,phone model,phone size,phone color,weeks_monday,claim,brand,model,generation,week_of_month,month,year,release
29,alcatel smartflip 4052r,4gb,black,2023-02-27,5,alcatel,smartflip,4052r,4,2,2023,NaT
35,alcatel tetra,16gb,black,2023-02-27,1,alcatel,tetra,,4,2,2023,NaT
36,alcatel volta,16gb,gray,2023-02-27,1,alcatel,volta,,4,2,2023,NaT
161,apple iphone 11,128gb,black,2023-02-06,205,apple,iphone,11,1,2,2023,NaT
162,apple iphone 11,128gb,black,2023-02-13,189,apple,iphone,11,2,2,2023,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...
28693,samsung galaxy z fold3,256gb,silver,2023-02-27,22,samsung,galaxy,z,4,2,2023,NaT
28695,samsung galaxy z fold3,512gb,black,2023-02-06,58,samsung,galaxy,z,1,2,2023,NaT
28696,samsung galaxy z fold3,512gb,black,2023-02-13,100,samsung,galaxy,z,2,2,2023,NaT
28697,samsung galaxy z fold3,512gb,black,2023-02-20,94,samsung,galaxy,z,3,2,2023,NaT


In [155]:
#iphone 3gs released on June 19, 2009
df.loc[df['phone model'] == 'apple iphone 3g s', 'release'] = '2009-06-19'

#iphone 4 released on 2010-06-24
df.loc[(df['brand'] == 'apple') & (df['generation'] == '4'), 'release'] = '2010-06-24'

#for phone_model iphone 4s, set the release date to 2011-10-14
df.loc[(df['brand'] == 'apple') & (df['generation'] == '4s'), 'release'] = '2011-10-14'

#for phone_model iphone 5, set the release date to 2012-09-21
df.loc[df['phone model'] == 'apple iphone 5', 'release'] = '2012-09-21'

#for phone model iphone 5s, set the release date to 2013-09-20
df.loc[df['phone model'] == 'apple iphone 5s', 'release'] = '2013-09-10'

#iphone 5s and 5c released on 2013-09-20
df.loc[df['phone model'] == 'apple iphone 5c', 'release'] = '2013-09-10'

#iphone 6 and 6 plus released on 2014-09-19
df.loc[(df['brand'] == 'apple') & (df['generation'] == '6'), 'release'] = '2014-09-19'

#iphone 6 and 6 plus released on 2014-09-19
df.loc[(df['brand'] == 'apple') & (df['generation'] == '6 plus'), 'release'] = '2014-09-19'

#for phone model iphone 6s, set the release date to 2015-09-15
df.loc[(df['brand'] == 'apple') & (df['generation'] == '6s'), 'release'] = '2015-09-15'

#iphone se march 31,2016
df.loc[(df['brand'] == 'apple') & (df['generation'] == 'se'), 'release'] = '2016-03-31'

#for brand apple and generation 7, in the column release, set the release date to 2016-09-16
df.loc[(df['brand'] == 'apple') & (df['generation'] == '7'), 'release'] = '2016-09-16'

#for brand apple and generation 8, in the column release, set the release date to 2017-09-22
df.loc[(df['brand'] == 'apple') & (df['generation'] == '8'), 'release'] = '2017-09-22'

#for brand apple and generation x, in the column release, set the release date to 2017-11-03
df.loc[(df['brand'] == 'apple') & (df['generation'] == 'x'), 'release'] = '2017-11-03'

#for brand apple and generation xr, in the column release, set the release date to 2018-10-26
df.loc[(df['brand'] == 'apple') & (df['generation'] == 'xr'), 'release'] = '2018-10-26'

#for brand apple and generation xs, in the column release, set the release date to 2018-09-21
df.loc[(df['brand'] == 'apple') & (df['generation'] == 'xs'), 'release'] = '2018-09-21'

#for brand apple and generation 11, in the column release, add the value 2019-09-20
df.loc[(df['brand'] == 'apple') & (df['generation'] == '11'), 'release'] = '2019-09-20'

#for brand apple and generation 12, in the column release, add the value 2020-11-13
df.loc[(df['brand'] == 'apple') & (df['generation'] == '12'), 'release'] = '2020-11-13'

#for brand apple and generation 13, in the column release, add the value 2021-09-24
df.loc[(df['brand'] == 'apple') & (df['generation'] == '13'), 'release'] = '2021-09-24'

#for brand apple and generation 14, in the column release, add the value 2022-09-16
df.loc[(df['brand'] == 'apple') & (df['generation'] == '14'), 'release'] = '2022-09-16'
    
#for brand apple and generation 14 plus, in the column release, add the value 2022-10-06
df.loc[(df['brand'] == 'apple') & (df['generation'] == '14 plus'), 'release'] = '2022-10-06'


In [156]:
#in the column weeks_since_release, subtract the release date from the weeks_monday date
df['weeks_since_release'] = df['weeks_monday'] - df['release']

#make weeks_since_release into the number of weeks and days
df['weeks_since_release'] = df['weeks_since_release'].dt.days / 7

In [157]:
apple = df[df['brand'] == 'apple']
apple

Unnamed: 0,phone model,phone size,phone color,weeks_monday,claim,brand,model,generation,week_of_month,month,year,release,weeks_since_release
161,apple iphone 11,128gb,black,2023-02-06,205,apple,iphone,11,1,2,2023,2019-09-20,176.428571
162,apple iphone 11,128gb,black,2023-02-13,189,apple,iphone,11,2,2,2023,2019-09-20,177.428571
163,apple iphone 11,128gb,black,2023-02-20,158,apple,iphone,11,3,2,2023,2019-09-20,178.428571
164,apple iphone 11,128gb,black,2023-02-27,179,apple,iphone,11,4,2,2023,2019-09-20,179.428571
241,apple iphone 11,128gb,green,2023-02-06,43,apple,iphone,11,1,2,2023,2019-09-20,176.428571
...,...,...,...,...,...,...,...,...,...,...,...,...,...
18607,apple iphone xs,64gb,gray,2023-02-27,46,apple,iphone,xs,4,2,2023,2018-09-21,231.428571
18684,apple iphone xs,64gb,silver,2023-02-06,12,apple,iphone,xs,1,2,2023,2018-09-21,228.428571
18685,apple iphone xs,64gb,silver,2023-02-13,11,apple,iphone,xs,2,2,2023,2018-09-21,229.428571
18686,apple iphone xs,64gb,silver,2023-02-20,17,apple,iphone,xs,3,2,2023,2018-09-21,230.428571


In [158]:
apple['release'].isnull().sum()

0

In [159]:
apple[apple['release'].isnull()]['generation'].unique()

array([], dtype=object)

In [160]:
apple['phone model'].unique()

array(['apple iphone 11', 'apple iphone 12', 'apple iphone 13',
       'apple iphone 14', 'apple iphone 4', 'apple iphone 5s',
       'apple iphone 6', 'apple iphone 6s', 'apple iphone 7',
       'apple iphone 8', 'apple iphone se', 'apple iphone x',
       'apple iphone xr', 'apple iphone xs'], dtype=object)

# Step 5: Add Holidays

Note: We use our own idea of holidays, rather than the official holiday calendar because we believe that these are the days that most affect sales and therefore claims. 

In [161]:
# create a dictionary with the holiday seasons
holidays = {
    'Christmas': ['12-25', '12-31'],
    'Black Friday': ['11-28', '11-29'],
    'Back to School': ['06-21', '09-27'],
    'Summer Vacation': ['06-01', '08-31']
}

In [162]:
apple['weeks_monday'] = pd.to_datetime(apple['weeks_monday'])

# label the holiday seasons
for holiday, date_range in holidays.items():
    start_date = f"{apple['weeks_monday'].iloc[0].year}-{date_range[0]}"
    end_date = f"{apple['weeks_monday'].iloc[0].year}-{date_range[1]}"
    apple.loc[(apple['weeks_monday'] >= start_date) & (apple['weeks_monday'] <= end_date), 'holiday_season'] = holiday

# fill the NAN with 0s and the other columns with 1s
apple['is_holiday'] = apple['holiday_season'].apply(lambda x: 0 if pd.isna(x) else 1)
apple = apple.drop('holiday_season', axis=1)

# print the resulting dataframe
apple

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  apple['weeks_monday'] = pd.to_datetime(apple['weeks_monday'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  apple.loc[(apple['weeks_monday'] >= start_date) & (apple['weeks_monday'] <= end_date), 'holiday_season'] = holiday
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  apple['is_holiday'] = apple[

Unnamed: 0,phone model,phone size,phone color,weeks_monday,claim,brand,model,generation,week_of_month,month,year,release,weeks_since_release,is_holiday
161,apple iphone 11,128gb,black,2023-02-06,205,apple,iphone,11,1,2,2023,2019-09-20,176.428571,0
162,apple iphone 11,128gb,black,2023-02-13,189,apple,iphone,11,2,2,2023,2019-09-20,177.428571,0
163,apple iphone 11,128gb,black,2023-02-20,158,apple,iphone,11,3,2,2023,2019-09-20,178.428571,0
164,apple iphone 11,128gb,black,2023-02-27,179,apple,iphone,11,4,2,2023,2019-09-20,179.428571,0
241,apple iphone 11,128gb,green,2023-02-06,43,apple,iphone,11,1,2,2023,2019-09-20,176.428571,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18607,apple iphone xs,64gb,gray,2023-02-27,46,apple,iphone,xs,4,2,2023,2018-09-21,231.428571,0
18684,apple iphone xs,64gb,silver,2023-02-06,12,apple,iphone,xs,1,2,2023,2018-09-21,228.428571,0
18685,apple iphone xs,64gb,silver,2023-02-13,11,apple,iphone,xs,2,2,2023,2018-09-21,229.428571,0
18686,apple iphone xs,64gb,silver,2023-02-20,17,apple,iphone,xs,3,2,2023,2018-09-21,230.428571,0


In [163]:
apple[apple['is_holiday'] == 1]

Unnamed: 0,phone model,phone size,phone color,weeks_monday,claim,brand,model,generation,week_of_month,month,year,release,weeks_since_release,is_holiday


As expected there are no holidays in this dataset, since it is only the months of february and march. 

In [164]:
apple.to_excel('apple_additional_data_clean.xlsx', index=False)