In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('../../data/eprel/raw_data/localspaceheaters.csv')

In [4]:
column_dict = {'Type':'Brand', 'Title':'Model', 'Content': 'Direct_heat_output', 
               'eclrow4': 'Indirect_heat_output', 'Content6': 'Energy_efficiency_index', 'Image': 'Label'}
columns_drop = ['eclrow', 'eclrow3', 'eclrow5']

In [5]:
def rename_and_drop(df, column_dict, columns_drop):
    df.rename(columns=column_dict, inplace=True)
    df.drop(columns=(columns_drop), axis=1, inplace=True)
    return df

In [6]:
df = rename_and_drop(df, column_dict, columns_drop)
df.head()

Unnamed: 0,Brand,Model,Direct_heat_output,Indirect_heat_output,Energy_efficiency_index,Label
0,"BESTAR INDUSTRIAL CO.,LTD.",PTC-1500TR,15,-,116,https://eprel.ec.europa.eu/assets/images/label...
1,"BESTAR INDUSTRIAL CO.,LTD.",PTC-1500P1,12,-,110,https://eprel.ec.europa.eu/assets/images/label...
2,Dongguan Luying Plastic Hardware Products Co....,YND-2000D,19,-,114,https://eprel.ec.europa.eu/assets/images/label...
3,FULMO/IPG31,309020101,42,-,91,https://eprel.ec.europa.eu/assets/images/label...
4,FULMO/IPB30,309020102,42,-,91,https://eprel.ec.europa.eu/assets/images/label...


In [7]:
def model_brand_label(df):
    df['Model'] = df['Model'].apply(lambda x: x.lower())
    df['Brand'] = df['Brand'].apply(lambda x: x.lower())
    df['Label'] = df['Label'].apply(lambda x: x.rpartition('/')[-1][:-4])
    return df

In [8]:
def remove_spaces(df):
    df['Model'] = df['Model'].apply(lambda x: x[1:-1])
    df['Brand'] = df['Brand'].apply(lambda x: x[1:-1])
    return df

In [9]:
df = model_brand_label(df)
df = remove_spaces(df)

In [10]:
df

Unnamed: 0,Brand,Model,Direct_heat_output,Indirect_heat_output,Energy_efficiency_index,Label
0,"bestar industrial co.,ltd.",ptc-1500tr,15,-,116,AP-Left-MediumGreen
1,"bestar industrial co.,ltd.",ptc-1500p1,12,-,110,AP-Left-MediumGreen
2,"dongguan luying plastic hardware products co.,...",ynd-2000d,19,-,114,AP-Left-MediumGreen
3,fulmo/ipg31,309020101,42,-,91,A-Left-LightGreen
4,fulmo/ipb30,309020102,42,-,91,A-Left-LightGreen
...,...,...,...,...,...,...
11866,la nordica,rosetta sinistra bii liberty bordeaux,73,-,106,A-Left-LightGreen
11867,la nordica,termorosa xxl dsa nero antracite,33,151,110,AP-Left-MediumGreen
11868,dal zotto,flaminia bianco,83,-,115,AP-Left-MediumGreen
11869,house,takka 800 al etuk_,17,-,119,AP-Left-MediumGreen


In [11]:
aux_marcas = [' co.,ltd.', ' co., ltd', '/ipg31', '/ipb30', '/ipb40', ' ltd.', ' s.r.l.', ' s.p.a.', ' srl', ' gmbh']

In [12]:
def quitar_aux_marcas(aux_marcas, df):
    for marca in aux_marcas:
        df['Brand'] = df['Brand'].str.replace(marca,'')
    return df

In [13]:
df = quitar_aux_marcas(aux_marcas, df)

  This is separate from the ipykernel package so we can avoid doing imports until


In [14]:
df['Brand'].unique()

array(['bestar industrial', 'dongguan luying plastic hardware products',
       'fulmo', 'corbero', 'mcz', "l'artistico", 'hellweg', 'jolly mec',
       'freepoint', 'pegaso', 'cadel', 'ravelli', 'jack sealey',
       'strendpro', 'superior', 'piazzetta', 'fireplace', 'innofire',
       'stovax', 'edilkamin', 'barbas', 'profile', 'scan',
       'josef davidssons', 'leda werk & co. kg', 'italiana camini',
       'nobis', 'focus', 'la nordica', 'tmc', 'thermorossi', 'extraflame',
       'unical', 'famar brevetti', 'jøtul', 'c&a chama', 'lincar',
       'vulcania', 'rüegg cheminée schweiz ag', 'raydan home',
       'trimline fires', 'stanley', 'rowi', 'bordelet', 'toyotomi',
       'bilberry', 'supra', 'casatelli', 'termovana', 'dff', ' casatelli',
       'brisach', 'seguin', 'axis', 'hase', 'aduro', 'gazco',
       'lotus heating systems a/s', 'attika', 'rais', 'red', 'amg',
       'honest general trading', 'ferlux', 'villager', 'anselmo cola',
       'haas+sohn ofentechnik', 'dedra', 's