# Import de Bibliotecas e Carregamento dos Dados

In [None]:
import pandas as pd
import re
from google.colab import drive
import plotly.express as px


drive.mount('/content/drive')

df_raw = pd.read_csv('/content/drive/MyDrive/FTC - Analise de Dados/train.csv')

df_raw.head()

Mounted at /content/drive


Unnamed: 0,ID,Delivery_person_ID,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Order_Date,Time_Orderd,Time_Order_picked,Weatherconditions,Road_traffic_density,Vehicle_condition,Type_of_order,Type_of_vehicle,multiple_deliveries,Festival,City,Time_taken(min)
0,0x4607,INDORES13DEL02,37,4.9,22.745049,75.892471,22.765049,75.912471,19-03-2022,11:30:00,11:45:00,conditions Sunny,High,2,Snack,motorcycle,0,No,Urban,(min) 24
1,0xb379,BANGRES18DEL02,34,4.5,12.913041,77.683237,13.043041,77.813237,25-03-2022,19:45:00,19:50:00,conditions Stormy,Jam,2,Snack,scooter,1,No,Metropolitian,(min) 33
2,0x5d6d,BANGRES19DEL01,23,4.4,12.914264,77.6784,12.924264,77.6884,19-03-2022,08:30:00,08:45:00,conditions Sandstorms,Low,0,Drinks,motorcycle,1,No,Urban,(min) 26
3,0x7a6a,COIMBRES13DEL02,38,4.7,11.003669,76.976494,11.053669,77.026494,05-04-2022,18:00:00,18:10:00,conditions Sunny,Medium,0,Buffet,motorcycle,1,No,Metropolitian,(min) 21
4,0x70a2,CHENRES12DEL01,32,4.6,12.972793,80.249982,13.012793,80.289982,26-03-2022,13:30:00,13:45:00,conditions Cloudy,High,1,Snack,scooter,1,No,Metropolitian,(min) 30


In [None]:
# Fazendo uma cópia do DataFrame Lido
df = df_raw.copy()

# Remover spaco da string
df['ID'] = df['ID'].str.strip()
df['Delivery_person_ID'] = df['Delivery_person_ID'].str.strip()

# Excluir as linhas com a idade dos entregadores vazia
# ( Conceitos de seleção condicional )
linhas_vazias = df['Delivery_person_Age'] != 'NaN '
df = df.loc[linhas_vazias, :]

# Conversao de texto/categoria/string para numeros inteiros
df['Delivery_person_Age'] = df['Delivery_person_Age'].astype( int )

# Conversao de texto/categoria/strings para numeros decimais
df['Delivery_person_Ratings'] = df['Delivery_person_Ratings'].astype( float )

# Conversao de texto para data
df['Order_Date'] = pd.to_datetime( df['Order_Date'], format='%d-%m-%Y' )

# Remove as linhas da culuna multiple_deliveries que tenham o 
# conteudo igual a 'NaN '
linhas_vazias = df['multiple_deliveries'] != 'NaN '
df = df.loc[linhas_vazias, :]
df['multiple_deliveries'] = df['multiple_deliveries'].astype( int )

# Comando para remover o texto de números
df = df.reset_index( drop=True )

# Retirando os numeros da coluna Time_taken(min)
df['Time_taken(min)'] = df['Time_taken(min)'].apply(lambda x: re.findall( r'\d+', x))

# Retirando os espaços da coluna Festival
df['Festival'] = df['Festival'].str.strip()

df['City'] = df['City'].str.strip()

df['Road_traffic_density'] = df['Road_traffic_density'].str.strip()

# Remove os NAN da coluna City
df = df.loc[df['City']!='NaN']

df = df.loc[df['Weatherconditions'] != 'conditions NaN']

# Remove os NA que forem np.na
df = df.dropna()

# As novas perguntas de negócio

In [None]:
df1 = df.copy()
df1['week_of_year'] = df1['Order_Date'].dt.strftime( "%U" )

## 1. Desenhar um gráfico de pizza com a média de idade dos entregadores por cidade

In [None]:
aux = df1[['City', 'Delivery_person_Age']].groupby('City').mean().reset_index()
px.pie(aux, values='Delivery_person_Age', names='City')

## 2. Desenhe um gráfico de linha, mostrando o número total de entregas diárias feitas por densidade de tráfego igual a “Low” e “Jam”

In [None]:
density = df1.loc[(df1['Road_traffic_density'] == 'Low') | (df1['Road_traffic_density'] == 'Jam')]
density = density[['Order_Date', 'ID']].groupby('Order_Date').count().reset_index()
density.columns = ['Order_Date', 'Numbers_of_deliveries']

px.line( density, x='Order_Date', y='Numbers_of_deliveries', title='Numbers of deliveries per day' )

## 3. Desenhe um gráfico de barras, mostrando as avaliações médias das entregas por semana.

In [None]:
week_ratings = round(df1[['week_of_year', 'Delivery_person_Ratings']].groupby('week_of_year').mean().reset_index(), 4)
week_ratings.columns = ['week_of_year', 'delivery_ratings_mean']

px.bar( week_ratings, x='week_of_year', y='delivery_ratings_mean', title='Delivery Ratings per Week', color='week_of_year', text_auto=True)

## 4. Desenhe um gráfico de pizza, mostrando a média de avaliações das entregas feitas por condições climáticas?

In [None]:
weather_ratings = round(df1[['Delivery_person_Ratings','Weatherconditions' ]].groupby('Weatherconditions').mean().reset_index(), 3)
weather_ratings.columns = ['Weatherconditions', 'Ratings']

px.pie(weather_ratings, names='Weatherconditions', values='Ratings', title='Ratings in Weather conditions')