# Importing required libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

from shapely.geometry import Point
import geopandas as gpd
from geopandas import GeoDataFrame
import seaborn as sns

### taking sample size of 50k for visualization

In [None]:
sample_size = 50000

# Reading data

In [None]:
data = pd.read_csv("../input/restaurant-recommendation-challenge/train_full.csv")
data = data.sample(sample_size)

In [None]:
data.shape

In [None]:
x ={}

In [None]:
for col in data.columns:
    if data[col].dtype not in x.keys():
        x[data[col].dtype] = 1
    else:
        x[data[col].dtype]+=1

In [None]:
x

In [None]:
x = data.isna().count()

In [None]:
data.head()

In [None]:
data.target.value_counts()

In [None]:
data.info()

In [None]:
orders =pd.read_csv("../input/restaurant-recommendation-challenge/orders.csv")
orders = orders.sample(sample_size)

In [None]:
vendors = pd.read_csv("../input/restaurant-recommendation-challenge/vendors.csv")


# Data Insights

In [None]:
data.head()

In [None]:
orders.head()

In [None]:
vendors.head()

In [None]:
data.describe()

In [None]:
# Visualising status_x and verified_X
data[['status_x','verified_x']].hist(grid=False)
plt.tight_layout()

plt.show()

In [None]:
#Plotting distribution of gender and location_type

fig,ax= plt.subplots(1, 2)
ax[0].pie(data.gender.value_counts(),labels=['Male','Female'])
data['location_type'].hist(ax=ax[1],grid=False)
plt.tight_layout()
plt.show()

In [None]:
# plotting lattitudes and longitudes of customers data

df = data[["latitude_x","longitude_x"]]

geometry = [Point(xy) for xy in zip(df['latitude_x'], df['longitude_x'])]
gdf = GeoDataFrame(df, geometry=geometry)   

#this is a simple map that goes with geopandas
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
gdf.plot(ax=world.plot(figsize=(20,10)), marker='o', color='red', markersize=15);

In [None]:
#Plotting lattitude and longitude of vendors data

df = data[["latitude_y","longitude_y"]]

geometry = [Point(xy) for xy in zip(df['latitude_y'], df['longitude_y'])]
gdf = GeoDataFrame(df, geometry=geometry)   

#this is a simple map that goes with geopandas
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
gdf.plot(ax=world.plot(figsize=(20,10)), marker='o', color='red', markersize=15);

In [None]:
data.location_number.unique()

In [None]:
#Discount given by vendors

plt.pie(data.discount_percentage.value_counts(),labels=data.discount_percentage.unique(),autopct='%1.0f%%',explode=[0.1,0.2,0,0.1,0])
plt.show()

In [None]:
#Target distribution

plt.pie(data.target.value_counts(),labels=data.target.unique(),autopct='%1.0f%%',explode=[0.1,0])
plt.show()

In [None]:
data['prepration_time'].hist(color='red')
plt.grid(False)

In [None]:
# corelation matrix of training data
sns.heatmap(data.corr())

In [None]:
orders.describe()

In [None]:
orders.head()

In [None]:
# Modes pf payment used
plt.pie(orders.payment_mode.value_counts(),labels=orders.payment_mode.unique(),autopct='%1.0f%%',explode=[0.1,0.2,0,0.1,0])

plt.show()

In [None]:
# Distribution of numerical features
fig, ax = plt.subplots(1, 2)
sns.distplot(orders['grand_total'], ax=ax[0], color='gold')
sns.distplot(orders['item_count'], ax=ax[1])
plt.tight_layout()
plt.show()

In [None]:
sns.heatmap(orders.corr())
plt.show()

In [None]:
# scatter plot of payment and items

fig, ax = plt.subplots(1, 2)
orders['delivery_date'] = pd.to_datetime(orders['delivery_date'])
ax[0].scatter(orders.set_index('delivery_date').index, orders['item_count'], 
              label='items', alpha=0.6, color='red')
ax[0].legend()

ax[1].scatter(orders.set_index('delivery_date').index, orders['grand_total'], 
              label='total pay', alpha=0.6, color='green')
ax[1].legend()
plt.tight_layout()
plt.show()

In [None]:
orders.groupby('customer_id').mean()['grand_total'].plot(marker='.', linestyle='none', color='gold')
plt.title('total cost');

In [None]:
df = vendors[["latitude","longitude"]]

geometry = [Point(xy) for xy in zip(df['latitude'], df['longitude'])]
gdf = GeoDataFrame(df, geometry=geometry)   

#this is a simple map that goes with geopandas
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
gdf.plot(ax=world.plot(figsize=(20,10)), marker='o', color='red', markersize=15);

In [None]:
plt.title("one_click_vendor")
plt.pie(vendors.one_click_vendor.value_counts(),labels='Y')
plt.show()

In [None]:
plt.title("city_id")
plt.pie(vendors.city_id.value_counts(),labels=["1.0"])
plt.show()