### Objetivo: organizar de forma separada as informações de tempo (coluna timestamp) e de propriedade (coluna property_type)

#### 1 - Importar pacotes

In [1]:
import pandas as pd
import numpy as np

#### 2 - Pegar dados

In [2]:
    # Base de dados
df = pd.read_csv('https://raw.githubusercontent.com/OpenClassrooms-Student-Center/Supervised-Learning/master/fe_splitting.csv')
df

Unnamed: 0,borough,property_type,timestamp_of_call
0,Kensington And chelsea,Purpose Built Flats/Maisonettes - 4 to 9 storeys,01/01/2017 16:48
1,Camden,Purpose Built Flats/Maisonettes - 4 to 9 storeys,01/01/2017 22:20
2,Southwark,Purpose Built Flats/Maisonettes - 4 to 9 storeys,01/01/2017 09:51
3,Westminster,Purpose Built Flats/Maisonettes - 4 to 9 storeys,01/01/2017 00:28
4,Barking And dagenham,House - single occupancy,01/01/2017 13:33
...,...,...,...
1424,Sutton,House - single occupancy,31/03/2017 14:58
1425,Hillingdon,House - single occupancy,31/03/2017 22:41
1426,Richmond Upon thames,Purpose Built Flats/Maisonettes - Up to 3 storeys,31/03/2017 17:01
1427,Hounslow,Converted Flat/Maisonette - Up to 2 storeys,31/03/2017 11:14


#### 3 - Checar os data types

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1429 entries, 0 to 1428
Data columns (total 3 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   borough            1429 non-null   object
 1   property_type      1429 non-null   object
 2   timestamp_of_call  1429 non-null   object
dtypes: object(3)
memory usage: 33.6+ KB


#### 4 - Coluna timestamp_of_call: Mudar o data type da coluna 'timestamp_of_call' para data

In [4]:
df['timestamp_of_call'] = pd.to_datetime(df['timestamp_of_call'])

    # Checando se o data type mudou
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1429 entries, 0 to 1428
Data columns (total 3 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   borough            1429 non-null   object        
 1   property_type      1429 non-null   object        
 2   timestamp_of_call  1429 non-null   datetime64[ns]
dtypes: datetime64[ns](1), object(2)
memory usage: 33.6+ KB


#### 5 - Coluna timestamp_of_call: Acrescentar novas colunas de tempo ao data frame

In [5]:
df['dia'] = df['timestamp_of_call'].dt.day
df['mês'] = df['timestamp_of_call'].dt.month
df['ano'] = df['timestamp_of_call'].dt.year
df['dia_semana'] = df['timestamp_of_call'].dt.weekday
df['hora'] = df['timestamp_of_call'].dt.hour

    # Conferindo se alguma coluna ficou com informações vazias
df.isnull().sum()

borough              0
property_type        0
timestamp_of_call    0
dia                  0
mês                  0
ano                  0
dia_semana           0
hora                 0
dtype: int64

In [6]:
df

Unnamed: 0,borough,property_type,timestamp_of_call,dia,mês,ano,dia_semana,hora
0,Kensington And chelsea,Purpose Built Flats/Maisonettes - 4 to 9 storeys,2017-01-01 16:48:00,1,1,2017,6,16
1,Camden,Purpose Built Flats/Maisonettes - 4 to 9 storeys,2017-01-01 22:20:00,1,1,2017,6,22
2,Southwark,Purpose Built Flats/Maisonettes - 4 to 9 storeys,2017-01-01 09:51:00,1,1,2017,6,9
3,Westminster,Purpose Built Flats/Maisonettes - 4 to 9 storeys,2017-01-01 00:28:00,1,1,2017,6,0
4,Barking And dagenham,House - single occupancy,2017-01-01 13:33:00,1,1,2017,6,13
...,...,...,...,...,...,...,...,...
1424,Sutton,House - single occupancy,2017-03-31 14:58:00,31,3,2017,4,14
1425,Hillingdon,House - single occupancy,2017-03-31 22:41:00,31,3,2017,4,22
1426,Richmond Upon thames,Purpose Built Flats/Maisonettes - Up to 3 storeys,2017-03-31 17:01:00,31,3,2017,4,17
1427,Hounslow,Converted Flat/Maisonette - Up to 2 storeys,2017-03-31 11:14:00,31,3,2017,4,11


#### 6 - Coluna property_type: separar/split de informações

In [7]:
    # Fazer o split por hífen
property = df['property_type'].str.split('-', expand = True) # If using expand=True, Series and Index callers return DataFrame and MultiIndex objects, respectively.
    
    # Criar nova coluna (property_types, a 1ª parte do endereço)
df['property_types'] = property[0]

    # Criar nova coluna (property_detail, a 2ª parte do endereço)
df['property_details'] = property[1]

    # Conferir se tem alguma informação vazia (pq não tinha hífen) em property_types, decorrente do split

df[df['property_types'].isnull()]
        # Obs: não tem nulls        

    # Conferir se tem alguma informação vazia (pq não tinha hífen) em property_details, decorrente do split

df[df['property_details'].isnull()]
df.isnull().sum()
        # Obs: tem nulls. Existem propriedades que não têm detalhes

borough                0
property_type          0
timestamp_of_call      0
dia                    0
mês                    0
ano                    0
dia_semana             0
hora                   0
property_types         0
property_details     143
dtype: int64

#### 7 - Remover a coluna property_type, pois já suas informações já foram separadas

In [8]:
df = df.drop(['property_type'], axis = 1)

#### 8 - Data frame atualizado

In [9]:
df

Unnamed: 0,borough,timestamp_of_call,dia,mês,ano,dia_semana,hora,property_types,property_details
0,Kensington And chelsea,2017-01-01 16:48:00,1,1,2017,6,16,Purpose Built Flats/Maisonettes,4 to 9 storeys
1,Camden,2017-01-01 22:20:00,1,1,2017,6,22,Purpose Built Flats/Maisonettes,4 to 9 storeys
2,Southwark,2017-01-01 09:51:00,1,1,2017,6,9,Purpose Built Flats/Maisonettes,4 to 9 storeys
3,Westminster,2017-01-01 00:28:00,1,1,2017,6,0,Purpose Built Flats/Maisonettes,4 to 9 storeys
4,Barking And dagenham,2017-01-01 13:33:00,1,1,2017,6,13,House,single occupancy
...,...,...,...,...,...,...,...,...,...
1424,Sutton,2017-03-31 14:58:00,31,3,2017,4,14,House,single occupancy
1425,Hillingdon,2017-03-31 22:41:00,31,3,2017,4,22,House,single occupancy
1426,Richmond Upon thames,2017-03-31 17:01:00,31,3,2017,4,17,Purpose Built Flats/Maisonettes,Up to 3 storeys
1427,Hounslow,2017-03-31 11:14:00,31,3,2017,4,11,Converted Flat/Maisonette,Up to 2 storeys
