In [72]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn import metrics

In [73]:
df_train = pd.read_csv('./datasets/train.csv')
df_test = pd.read_csv('./datasets/test.csv')

In [74]:
df_train.head()
df_test.head()

Unnamed: 0,Id,PID,MS SubClass,MS Zoning,Lot Frontage,Lot Area,Street,Alley,Lot Shape,Land Contour,...,3Ssn Porch,Screen Porch,Pool Area,Pool QC,Fence,Misc Feature,Misc Val,Mo Sold,Yr Sold,Sale Type
0,2658,902301120,190,RM,69.0,9142,Pave,Grvl,Reg,Lvl,...,0,0,0,,,,0,4,2006,WD
1,2718,905108090,90,RL,,9662,Pave,,IR1,Lvl,...,0,0,0,,,,0,8,2006,WD
2,2414,528218130,60,RL,58.0,17104,Pave,,IR1,Lvl,...,0,0,0,,,,0,9,2006,New
3,1989,902207150,30,RM,60.0,8520,Pave,,Reg,Lvl,...,0,0,0,,,,0,7,2007,WD
4,625,535105100,20,RL,,9500,Pave,,IR1,Lvl,...,0,185,0,,,,0,7,2009,WD


In [104]:
def clean(dataframe):
    df_clean = dataframe.rename(columns = {'Id':'Id',
                                           'Lot Area':'lot_area',
                                           'Street':'street', 
                                           'Land Contour':'land_cont', 
                                           'Neighborhood':'neighborhood', 
                                           'Condition 1':'cond_1', 
                                           'Condition 2':'cond_2', 
                                           'Bldg Type':'bldg_type', 
                                           'House Style':'style',
                                           'Overall Cond':'overall_cond', 
                                           'Year Built':'yr_built', 
                                           'Year Remod/Add':'yr_remodeled',
                                           'Roof Style':'roof_style',
                                           'Exter Cond':'exter_cond', 
                                           'Foundation':'foundation',
                                           'Bsmt Cond':'bsmt_cond', 
                                           'BsmtFin Type 1':'bsmt_fin_1',
                                           'BsmtFin Type 2':'bsmt_fin_2', 
                                           'Total Bsmt SF':'bsmt_sf', 
                                           'Heating':'heat', 
                                           'Central Air':'cent_air',
                                           'Gr Liv Area':'gr_liv_area', 
                                           'Full Bath':'full_bath', 
                                           'Half Bath':'half_bath', 
                                           'Bedroom AbvGr':'bedrooms_gr',
                                           'Kitchen AbvGr':'kitchen',
                                           'Kitchen Qual':'kitch_qual',
                                           'Fireplaces':'fireplaces',
                                           'Garage Type':'garage_type',
                                           'Garage Cars':'garage_car_size',
                                           'Paved Drive':'paved_drive', 
                                           'Pool QC':'pool_qual',
                                           'Yr Sold':'year_sold'
                                           })
    included_cols = ['Id',
                     'lot_area',
                     'street',   # Secondary, removing from first model, value split of 2044 / 7
                     'land_cont',   # Value split of 1843 / 85 / 80 / 43 
                     'neighborhood',
                     'cond_1',
                     'cond_2',  # Secondary, removing from first model, value split of 2025 (Normal) / 26 (other values)
                     'bldg_type', 
                     'style',
                     'overall_cond',   # Secondary variable to investigate
                     'yr_built', 
                     'yr_remodeled', 
                     'roof_style',   # Secondary variable to investigate
                     'exter_cond', 
                     'foundation',   # Secondary variable to investigate
                     'bsmt_cond',   # Secondary, removing from first model, 1834 (Typicals) / 92 (Good | Excellent) / 70 (Fair | Poor)
                     'bsmt_fin_1',   # Secondary variable to investigate
                     'bsmt_fin_2',   # Secondary variable to investigate 
                     'bsmt_sf', 
                     'heat',   # Secondary, removing from first model, value split of 2018 (GasA) / 33 (other values)
                     'cent_air',   # Secondary variable to investigate   
                     'gr_liv_area', 
                     'full_bath', 
                     'half_bath', 
                     'bedrooms_gr', 
                     'kitchen',   # Secondary variable to investigate
                     'kitch_qual', 
                     'fireplaces',   # Secondary variable to investigate
                     'garage_type', 
                     'garage_car_size',
                     'paved_drive',   # Secondary, removing from first model, 1861 (Paved) / 39 (Partial) / 151 (Dirt/Gravel)
                     'pool_qual',   # Consider removing from first model, only 9 houses with pools
                     'year_sold'
                     ]

    if 'SalePrice' not in dataframe.columns:
        df_clean = df_clean[included_cols]
    elif 'SalePrice' in dataframe.columns:
        df_clean = df_clean[included_cols].merge(dataframe[['Id', 'SalePrice']], left_on='Id', right_on='Id', how='left')
        df_clean = df_clean.rename(columns = {'SalePrice':'sale_price'})
    return df_clean

In [110]:
df_train_cleaned = clean(df_train)
df_train_cleaned.columns
df_train_cleaned.head()

Unnamed: 0,Id,lot_area,street,land_cont,neighborhood,cond_1,cond_2,bldg_type,style,overall_cond,...,bedrooms_gr,kitchen,kitch_qual,fireplaces,garage_type,garage_car_size,paved_drive,pool_qual,year_sold,sale_price
0,109,13517,Pave,Lvl,Sawyer,RRAe,Norm,1Fam,2Story,8,...,3,1,Gd,0,Attchd,2.0,Y,,2010,130500
1,544,11492,Pave,Lvl,SawyerW,Norm,Norm,1Fam,2Story,5,...,4,1,Gd,1,Attchd,2.0,Y,,2009,220000
2,153,7922,Pave,Lvl,NAmes,Norm,Norm,1Fam,1Story,7,...,3,1,Gd,0,Detchd,1.0,Y,,2010,109000
3,318,9802,Pave,Lvl,Timber,Norm,Norm,1Fam,2Story,5,...,3,1,TA,0,BuiltIn,2.0,Y,,2010,174000
4,255,14235,Pave,Lvl,SawyerW,Norm,Norm,1Fam,1.5Fin,8,...,3,1,TA,0,Detchd,2.0,N,,2010,138500


In [109]:
df_test_cleaned = clean(df_test)
df_test_cleaned.columns
df_test_cleaned.head()

Unnamed: 0,Id,lot_area,street,land_cont,neighborhood,cond_1,cond_2,bldg_type,style,overall_cond,...,half_bath,bedrooms_gr,kitchen,kitch_qual,fireplaces,garage_type,garage_car_size,paved_drive,pool_qual,year_sold
0,2658,9142,Pave,Lvl,OldTown,Norm,Norm,2fmCon,2Story,8,...,0,4,2,Fa,0,Detchd,1,Y,,2006
1,2718,9662,Pave,Lvl,Sawyer,Norm,Norm,Duplex,1Story,4,...,0,6,2,TA,0,Attchd,2,Y,,2006
2,2414,17104,Pave,Lvl,Gilbert,Norm,Norm,1Fam,2Story,5,...,1,3,1,Gd,1,Attchd,2,Y,,2006
3,1989,8520,Pave,Lvl,OldTown,Norm,Norm,1Fam,1Story,6,...,0,2,1,TA,0,Detchd,2,N,,2007
4,625,9500,Pave,Lvl,NAmes,Norm,Norm,1Fam,1Story,5,...,1,3,1,TA,2,Attchd,2,Y,,2009


In [112]:
# Used .describe() to check mean/min/max values and ensure all numerical columns weren't missing data

df_train_cleaned.describe()

Unnamed: 0,Id,lot_area,overall_cond,yr_built,yr_remodeled,bsmt_sf,gr_liv_area,full_bath,half_bath,bedrooms_gr,kitchen,fireplaces,garage_car_size,year_sold,sale_price
count,2051.0,2051.0,2051.0,2051.0,2051.0,2050.0,2051.0,2051.0,2051.0,2051.0,2051.0,2051.0,2050.0,2051.0,2051.0
mean,1474.033642,10065.208191,5.562165,1971.708922,1984.190151,1057.987805,1499.330083,1.577279,0.371039,2.843491,1.042906,0.590931,1.776585,2007.775719,181469.701609
std,843.980841,6742.488909,1.104497,30.177889,21.03625,449.410704,500.447829,0.549279,0.501043,0.826618,0.20979,0.638516,0.764537,1.312014,79258.659352
min,1.0,1300.0,1.0,1872.0,1950.0,0.0,334.0,0.0,0.0,0.0,0.0,0.0,0.0,2006.0,12789.0
25%,753.5,7500.0,5.0,1953.5,1964.5,793.0,1129.0,1.0,0.0,2.0,1.0,0.0,1.0,2007.0,129825.0
50%,1486.0,9430.0,5.0,1974.0,1993.0,994.5,1444.0,2.0,0.0,3.0,1.0,1.0,2.0,2008.0,162500.0
75%,2198.0,11513.5,6.0,2001.0,2004.0,1318.75,1728.5,2.0,1.0,3.0,1.0,1.0,2.0,2009.0,214000.0
max,2930.0,159000.0,9.0,2010.0,2010.0,6110.0,5642.0,4.0,2.0,8.0,3.0,4.0,5.0,2010.0,611657.0


In [113]:
# Check how many null values in each column

df_train_cleaned.isnull().sum()

Id                    0
lot_area              0
street                0
land_cont             0
neighborhood          0
cond_1                0
cond_2                0
bldg_type             0
style                 0
overall_cond          0
yr_built              0
yr_remodeled          0
roof_style            0
exter_cond            0
foundation            0
bsmt_cond            55
bsmt_fin_1           55
bsmt_fin_2           56
bsmt_sf               1
heat                  0
cent_air              0
gr_liv_area           0
full_bath             0
half_bath             0
bedrooms_gr           0
kitchen               0
kitch_qual            0
fireplaces            0
garage_type         113
garage_car_size       1
paved_drive           0
pool_qual          2042
year_sold             0
sale_price            0
dtype: int64

In [121]:
# Investigate bsmt_cond and bsmt_sf for null bsmt_sf entry

# DELETE IF NOT NEEDED

# null_bsmt_cond = df_train_cleaned[df_train_cleaned['bsmt_cond'].isnull()]
# null_bsmt_sf = df_train_cleaned[df_train_cleaned['bsmt_sf'].isnull()]
# # null_bsmt_cond[['bsmt_cond', 'bsmt_sf']]
# null_bsmt_sf[['bsmt_cond', 'bsmt_sf']]

In [118]:
# Checking .value_counts() for columns

df_train_cleaned['bsmt_cond'].value_counts()

TA    1834
Gd      89
Fa      65
Po       5
Ex       3
Name: bsmt_cond, dtype: int64

In [122]:
df_train_cleaned['bsmt_cond'].sort_values()

102      Ex
1578     Ex
614      Ex
1373     Fa
306      Fa
       ... 
1859    NaN
1875    NaN
1889    NaN
1933    NaN
2010    NaN
Name: bsmt_cond, Length: 2051, dtype: object

In [123]:
# Checking datatypes for all columns

df_train_cleaned.dtypes

Id                   int64
lot_area             int64
street              object
land_cont           object
neighborhood        object
cond_1              object
cond_2              object
bldg_type           object
style               object
overall_cond         int64
yr_built             int64
yr_remodeled         int64
roof_style          object
exter_cond          object
foundation          object
bsmt_cond           object
bsmt_fin_1          object
bsmt_fin_2          object
bsmt_sf            float64
heat                object
cent_air            object
gr_liv_area          int64
full_bath            int64
half_bath            int64
bedrooms_gr          int64
kitchen              int64
kitch_qual          object
fireplaces           int64
garage_type         object
garage_car_size    float64
paved_drive         object
pool_qual           object
year_sold            int64
sale_price           int64
dtype: object

In [16]:
# plt.figure(figsize=(8,5))
# plt.scatter(df_clean['overall_cond'], df_clean['sale_price'], alpha=.25);  # Look at Pandas boxplot

In [17]:
# plt.figure(figsize=(8,5))
# plt.scatter(df_clean['yr_remodeled'], df_clean['sale_price'], alpha=.25);

In [18]:
# plt.figure(figsize=(8,5))
# plt.scatter(df_clean['gr_liv_area'], df_clean['sale_price'], alpha=.25);

In [19]:
# plt.figure(figsize=(8,5))
# plt.scatter(df_clean['bsmt_sf'], df_clean['sale_price'], alpha=.25);

In [20]:
# Created has_pool column to use binary data if house has pool and disregard condition/quality

df_clean['has_pool'] = np.where(df_clean['pool_qual'].isnull(), 0, 1)
df_clean['has_pool'].value_counts()

0    2042
1       9
Name: has_pool, dtype: int64

In [21]:
# Replace null values
# https://www.geeksforgeeks.org/python-pandas-dataframe-fillna-to-replace-null-values-in-dataframe/

df_clean['bsmt_sf'] = df_clean["bsmt_sf"].fillna(0)
df_clean["bsmt_cond"] = df_clean["bsmt_cond"].fillna('None')
df_clean["bsmt_fin_1"] = df_clean["bsmt_fin_1"].fillna('None')
df_clean["bsmt_fin_2"] = df_clean["bsmt_fin_2"].fillna('None')
df_clean["garage_type"] = df_clean["garage_type"].fillna('None')

In [126]:
# Replace garage_car_size (NaN) with median value (mean and median are essentially the same)

df_train_cleaned['garage_car_size'].notnull().median()
df_train_cleaned["garage_car_size"] = df_train_cleaned["garage_car_size"].fillna(df_train_cleaned["garage_car_size"].median())

In [23]:
# Drop the pool_qual column, already converted to has_pool

df_clean.drop(columns=['pool_qual'], inplace=True)

In [24]:
# Check for remaining null values

df_clean.isnull().sum()

lot_area            0
street              0
land_cont           0
neighborhood        0
cond_1              0
cond_2              0
bldg_type           0
style               0
overall_cond        0
yr_built            0
yr_remodeled        0
roof_style          0
exter_cond          0
foundation          0
bsmt_cond           0
bsmt_fin_1          0
bsmt_fin_2          0
bsmt_sf             0
heat                0
cent_air            0
gr_liv_area         0
full_bath           0
half_bath           0
bedrooms_gr         0
kitchen             0
kitch_qual          0
fireplaces          0
garage_type         0
garage_car_size     0
paved_drive         0
sale_price          0
year_sold           0
fe_bed_full_bath    0
has_pool            0
dtype: int64

In [25]:
# Enter cond_2 values of [RRNn, RRAn, RRNe, RRAe] for cond_1 values if cond_1 values [Artery, Feedr, Norm, PosN, or PosA]

for num in range(len(df_clean)):
    
    cond_tst_1a = df_clean['cond_2'][num] == 'RRAn'
    cond_tst_1b = df_clean['cond_2'][num] == 'RRAe'
    cond_tst_2a = df_clean['cond_2'][num] == 'RRNn'
    cond_tst_2b = df_clean['cond_2'][num] == 'RRNe'
    cond_tst_2c = df_clean['cond_1'][num] != 'RRAn'
    cond_tst_2d = df_clean['cond_1'][num] != 'RRAe'    
    gets_replaced = df_clean['cond_1'][num]
    does_replacing = df_clean['cond_2'][num]
     
    if cond_tst_1a or cond_tst_1b:
        df_clean.replace(gets_replaced, does_replacing, inplace=True)
    elif (cond_tst_2a or cond_tst_2b) and (cond_tst_2c or cond_tst_2d):
        df_clean.replace(gets_replaced, does_replacing, inplace=True)
        
        
df_clean.head()

Unnamed: 0,lot_area,street,land_cont,neighborhood,cond_1,cond_2,bldg_type,style,overall_cond,yr_built,...,kitchen,kitch_qual,fireplaces,garage_type,garage_car_size,paved_drive,sale_price,year_sold,fe_bed_full_bath,has_pool
0,13517,Pave,Lvl,Sawyer,RRAe,Norm,1Fam,2Story,8,1976,...,1,Gd,0,Attchd,2.0,Y,130500,2010,6,0
1,11492,Pave,Lvl,SawyerW,Norm,Norm,1Fam,2Story,5,1996,...,1,Gd,1,Attchd,2.0,Y,220000,2009,8,0
2,7922,Pave,Lvl,NAmes,Norm,Norm,1Fam,1Story,7,1953,...,1,Gd,0,Detchd,1.0,Y,109000,2010,3,0
3,9802,Pave,Lvl,Timber,Norm,Norm,1Fam,2Story,5,2006,...,1,TA,0,BuiltIn,2.0,Y,174000,2010,6,0
4,14235,Pave,Lvl,SawyerW,Norm,Norm,1Fam,1.5Fin,8,1900,...,1,TA,0,Detchd,2.0,N,138500,2010,6,0


In [26]:
# Mapping small neighborhoods to larger adjacent neighborhoods

df_clean['neighborhood'] = df_clean['neighborhood'].map({'NAmes':'NAmes',
                                                         'CollgCr':'CollgCr',
                                                         'OldTown':'OldTown',
                                                         'Edwards':'Edwards',
                                                         'Somerst':'Somerst',
                                                         'NridgHt':'NridgHt',
                                                         'Gilbert':'Gilbert',
                                                         'Sawyer':'Sawyer',
                                                         'SawyerW':'SawyerW',
                                                         'Mitchel':'Mitchel',
                                                         'BrkSide':'BrkSide',
                                                         'Crawfor':'Crawfor',
                                                         'IDOTRR':'IDOTRR',
                                                         'Timber':'Timber',
                                                         'NoRidge':'NoRidge',
                                                         'StoneBr':'StoneBr',
                                                         'SWISU':'SWISU',
                                                         'ClearCr':'ClearCr',
                                                         'MeadowV':'MeadowV',
                                                         'Blmngtn':'Blmngtn',
                                                         'BrDale':'BrDale',
                                                         'Veenker':'Veenker',
                                                         'NPkVill':'NPkVill',
                                                         'Blueste':'Crawfor',
                                                         'Greens':'Somerst',
                                                         'GrnHill':'Timber',
                                                         'Landmrk':'Somerst'
                                                        })

In [27]:
# Mapping style to groups

df_clean['style'] = df_clean['style'].map({'1Story':'1Story',
                                           '2Story':'2Story',
                                           '1.5Fin':'Fin',
                                           'SLvl':'SLvl',
                                           'SFoyer':'SFoyer',
                                           '2.5Unf':'Unfin',
                                           '1.5Unf':'Unfin',
                                           '2.5Fin':'Fin'
                                          })

In [28]:
# Mapping overall_cond to combine three lowest values

df_clean['overall_cond'] = df_clean['overall_cond'].map({1:3,
                                                         2:3,
                                                         3:3,
                                                         4:4,
                                                         5:5,
                                                         6:6,
                                                         7:7,
                                                         8:8,
                                                         9:9
                                                        })

In [29]:
# Mapping roof_style so small groups go to Other

df_clean['roof_style'] = df_clean['roof_style'].map({'Gable':'Gable',
                                                     'Hip':'Hip',
                                                     'Flat':'Other',
                                                     'Gambrel':'Other',
                                                     'Mansard':'Other',
                                                     'Shed':'Other'
                                                    })

In [30]:
# Mapping exter_cond to combine like categories

df_clean['exter_cond'] = df_clean['exter_cond'].map({'TA':'TA',
                                                     'Gd':'Gd',
                                                     'Fa':'Fa',
                                                     'Ex':'Gd',
                                                     'Po':'Fa'
                                                    })

In [31]:
# Mapping foundation to combine smaller categories to Other

df_clean['foundation'] = df_clean['foundation'].map({'PConc':'PConc',
                                                     'CBlock':'CBlock',
                                                     'BrkTil':'BrkTil',
                                                     'Slab':'Other',
                                                     'Stone':'Other',
                                                     'Wood':'Other'
                                                    })

In [32]:
# Mapping exter_cond to combine like categories

df_clean['bsmt_cond'] = df_clean['bsmt_cond'].map({'TA':'TA',
                                                   'Gd':'Gd',
                                                   'Fa':'Fa',
                                                   'Ex':'Gd',
                                                   'Po':'Fa',
                                                   'None':'None'
                                                    })

In [33]:
# Mapping heating to group non-gas options

df_clean['heat'] = df_clean['heat'].map({'GasA':'GasA',
                                         'GasW':'GasW',
                                         'Wall':'NonGas',
                                         'Grav':'NonGas',
                                         'OthW':'NonGas'
                                        })

In [34]:
# Mapping kitchen to groups

df_clean['kitchen'] = df_clean['kitchen'].map({1:1,
                                               0:1,
                                               3:2,
                                               2:2
                                              })

In [35]:
# Mapping fireplaces to groups

df_clean['fireplaces'] = df_clean['fireplaces'].map({0:0,
                                                     1:1,
                                                     2:2,
                                                     3:2,
                                                     4:2
                                                    })

In [36]:
# Mapping garage_car_size to groups

df_clean['garage_car_size'] = df_clean['garage_car_size'].map({0:0,
                                                               1:1,
                                                               2:2,
                                                               3:3,
                                                               4:3,
                                                               5:3
                                                              })

In [37]:
# Mapping kitch_qual to combine like categories

df_clean['kitch_qual'] = df_clean['kitch_qual'].map({'TA':'TA',
                                                     'Gd':'Gd',
                                                     'Fa':'Fa',
                                                     'Ex':'Ex',
                                                     'Po':'Fa',
                                                      })

In [38]:
# Mapping cond_1 to combine like categories

df_clean['cond_1'] = df_clean['cond_1'].map({'Norm':'Norm',
                                             'Feedr':'Artery',
                                             'Artery':'Artery',
                                             'RRAn':'RRA',
                                             'PosN':'Fa',
                                             'PosA':'Fa',
                                             'RRAe':'RRA',
                                             'RRNn':'RRN',
                                             'RRNe':'RRN'
                                            })

In [54]:
# Check value_counts for all categorical data

# street               # DONE          DON'T USE, 2044 / 7 feature split
# land_cont            # DONE          Max: 1843 / Min: 43
# neighborhood         # DONE          Max: 310 / Min: 1, 2, 3, 6; NEED TO CHECK MAP AND GROUP SMALL NUMBER
# cond_1               # DONE          Combined RRNn/RRNe, RRAn/RRAe
# cond_2               # DONE          DO NOT USE
# bldg_type            # DONE          Max: 1700 / Min: 46
# style                # DONE          COMBINE 1.5Fin and 2.5Fin, 1.5Unf and 2.5Unf
# overall_cond         # DONE          Max: 1168 / Min: 4; COMBINE 1, 2, 3 values
# roof_style           # DONE          Max: 1619 / Min: 3; DON'T USE AT FIRST
# exter_cond           # DONE          COMBINE Gd and Ex, Po and Fa
# foundation           # DONE          Max: 926 / Min: 2, 5; CONSIDER COMBINING Wood and Stone
# bsmt_cond            # DONE          COMBINE Gd and Ex, Po and Fa
# bsmt_fin_1           # DONE          Max: 615 / Min: 102
# bsmt_fin_2           # DONE          DON'T USE; Max: 1749 / Min: 23 
# bsmt_sf              # DONE          USE AS NUMERICAL
# heat                 # DONE          COMBINE NON-GAS METHODS (Wall, Grav, OthW)
# cent_air             # DONE          Y: 1910 / N: 141
# kitchen              # DONE          COMBINE 0 and 1, 2 and 3
# kitch_qual           # DONE          USE AS IS
# fireplaces           # DONE          COMBINE 2 and 3 and 4
# garage_type          # DONE          USE AS IS
# garage_car_size      # DONE          COMBINE 3 and 4 and 5
# paved_drive          # DONE          DON'T USE AT FIRST, USE AS IS
# has_pool             # DONE          0: 2042 / 1: 9
# sale_price           # DONE          Y target

df_clean['overall_cond'].value_counts()

5    1168
6     368
7     270
8     101
4      70
3      45
9      29
Name: overall_cond, dtype: int64

In [48]:
pd.set_option('display.max_rows', None)

In [39]:
# df_clean.to_csv('./datasets/test_cleaned.csv')
df_clean.to_csv('./datasets/train_cleaned.csv')

In [40]:
df_clean['cond_1'].head(50)

0       Norm
1       Norm
2       Norm
3       Norm
4       Norm
5       Norm
6       Norm
7       Norm
8       Norm
9       Norm
10      Norm
11      Norm
12      Norm
13    Artery
14      Norm
15    Artery
16      Norm
17      Norm
18        Fa
19    Artery
20      Norm
21    Artery
22      Norm
23      Norm
24      Norm
25      Norm
26    Artery
27    Artery
28    Artery
29    Artery
30      Norm
31      Norm
32      Norm
33      Norm
34      Norm
35       RRA
36      Norm
37      Norm
38      Norm
39      Norm
40      Norm
41      Norm
42      Norm
43      Norm
44      Norm
45      Norm
46      Norm
47      Norm
48      Norm
49    Artery
Name: cond_1, dtype: object