### Add / Modify columns

#### 0. Load data

In [1]:
import pandas as pd
walt = pd.read_csv('datasets/WALMART_SALES_DATA.csv')

display(walt.head(10))

Unnamed: 0,Store,Date,Weekly_Sales,Holiday_Flag,Temperature,Fuel_Price,CPI,Unemployment
0,1,05-02-2010,1643690.9,0,42.31,2.572,211.096358,8.106
1,1,12-02-2010,1641957.44,1,38.51,2.548,211.24217,8.106
2,1,19-02-2010,1611968.17,0,39.93,2.514,211.289143,8.106
3,1,26-02-2010,1409727.59,0,46.63,2.561,211.319643,8.106
4,1,05-03-2010,1554806.68,0,46.5,2.625,211.350143,8.106
5,1,12-03-2010,1439541.59,0,57.79,2.667,211.380643,8.106
6,1,19-03-2010,1472515.79,0,54.58,2.72,211.215635,8.106
7,1,26-03-2010,1404429.92,0,51.45,2.732,211.018042,8.106
8,1,02-04-2010,1594968.28,0,62.27,2.719,210.82045,7.808
9,1,09-04-2010,1545418.53,0,65.86,2.77,210.622857,7.808


#### 1. Adding new column

In [2]:
# check for descriptives
walt['Weekly_Sales'].describe()

count    6.435000e+03
mean     1.046965e+06
std      5.643666e+05
min      2.099862e+05
25%      5.533501e+05
50%      9.607460e+05
75%      1.420159e+06
max      3.818686e+06
Name: Weekly_Sales, dtype: float64

In [3]:
walt.columns

Index(['Store', 'Date', 'Weekly_Sales', 'Holiday_Flag', 'Temperature',
       'Fuel_Price', 'CPI', 'Unemployment'],
      dtype='object')

In [4]:
# TAX rate is 9% 
walt['TAX'] = walt['Weekly_Sales']*0.09
display(walt['TAX'].describe())
print(walt.columns)

count      6435.000000
mean      94226.838981
std       50792.995985
min       18898.762500
25%       49801.509450
50%       86467.143600
75%      127814.279400
max      343681.780500
Name: TAX, dtype: float64

Index(['Store', 'Date', 'Weekly_Sales', 'Holiday_Flag', 'Temperature',
       'Fuel_Price', 'CPI', 'Unemployment', 'TAX'],
      dtype='object')


In [5]:
# Total sales incl. tax
walt['Tot_Sales'] = walt['Weekly_Sales'] + walt['TAX']
display(walt[['Weekly_Sales', 'TAX', 'Tot_Sales']].describe())

Unnamed: 0,Weekly_Sales,TAX,Tot_Sales
count,6435.0,6435.0,6435.0
mean,1046965.0,94226.838981,1141192.0
std,564366.6,50792.995985,615159.6
min,209986.2,18898.7625,228885.0
25%,553350.1,49801.50945,603151.6
50%,960746.0,86467.1436,1047213.0
75%,1420159.0,127814.2794,1547973.0
max,3818686.0,343681.7805,4162368.0


In [6]:
display(walt[['Weekly_Sales', 'TAX', 'Tot_Sales']].head())

Unnamed: 0,Weekly_Sales,TAX,Tot_Sales
0,1643690.9,147932.181,1791623.0
1,1641957.44,147776.1696,1789734.0
2,1611968.17,145077.1353,1757045.0
3,1409727.59,126875.4831,1536603.0
4,1554806.68,139932.6012,1694739.0


In [7]:
# do the same using lambda function 
walt['total2'] = walt['Weekly_Sales'].apply(lambda s:s+(s*0.09))

In [8]:
walt[['total2', 'Tot_Sales']]

Unnamed: 0,total2,Tot_Sales
0,1.791623e+06,1.791623e+06
1,1.789734e+06,1.789734e+06
2,1.757045e+06,1.757045e+06
3,1.536603e+06,1.536603e+06
4,1.694739e+06,1.694739e+06
...,...,...
6430,7.773596e+05,7.773596e+05
6431,7.994660e+05,7.994660e+05
6432,8.005662e+05,8.005662e+05
6433,7.827568e+05,7.827568e+05


In [9]:
# drop total2 and TAX from walt dataframe
walt = walt.drop(columns=['TAX'])
walt.head()

Unnamed: 0,Store,Date,Weekly_Sales,Holiday_Flag,Temperature,Fuel_Price,CPI,Unemployment,Tot_Sales,total2
0,1,05-02-2010,1643690.9,0,42.31,2.572,211.096358,8.106,1791623.0,1791623.0
1,1,12-02-2010,1641957.44,1,38.51,2.548,211.24217,8.106,1789734.0,1789734.0
2,1,19-02-2010,1611968.17,0,39.93,2.514,211.289143,8.106,1757045.0,1757045.0
3,1,26-02-2010,1409727.59,0,46.63,2.561,211.319643,8.106,1536603.0,1536603.0
4,1,05-03-2010,1554806.68,0,46.5,2.625,211.350143,8.106,1694739.0,1694739.0


#### 2. Modifying existing column

In [10]:
# change data type: str, int, float, bool
walt.dtypes

Store             int64
Date             object
Weekly_Sales    float64
Holiday_Flag      int64
Temperature     float64
Fuel_Price      float64
CPI             float64
Unemployment    float64
Tot_Sales       float64
total2          float64
dtype: object

In [11]:
# change Store to be str 
walt['Store'] = walt['Store'].astype(str)
walt['Store']

0        1
1        1
2        1
3        1
4        1
        ..
6430    45
6431    45
6432    45
6433    45
6434    45
Name: Store, Length: 6435, dtype: object

In [12]:
# change Date to datetime
walt['Date'] = pd.to_datetime(walt['Date'])
walt['Date'].head(3)

0   2010-05-02
1   2010-12-02
2   2010-02-19
Name: Date, dtype: datetime64[ns]