In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

Insights from online pet food customer orders


Task

Main task is to generate actionable insights from this pet food customer orders dataset.
As a focus area, try to address the following questions:

    Which customers order and reorder the wet food, and when are they likely to try it?
    Are there certain characteristics of the pets, customers or their orders that impact how likely they are to purchase the wet food, and keep ordering it?
    Based on this data, what do you suggest we focus on to get more customers taking and continuing to enjoy our wet food?

Data Intro

The data set is a selection of customers and their orders, with some pet characteristics and details of what they got in each order.

Orders are numbered in sequence for each pet and every order contains dry food, which is the core offering of all online pet food companies. Some customers also purchase wet food (the subject of this challenge) alongside their dry food and in this dataset you also have the wet food order sequence. So if a customer has 2 dry food only orders followed by an order containing dry and wet food, then they will have a row in this dataset with petordernumber = 3, and wetfoodorder_number = 1.

This customer order dataset is derived from a subscription business. If a pet has an active subscription, then the business will make and deliver an order every 31 days, which contains dry food and whatever other products a customer has on their subscription. A customer can remove wet food and treats from their orders and still keep an active subscription, the business will just continue to make and send them  their dry food.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df=pd.read_csv('/kaggle/input/pet-food-customer-orders-online/pet_food_customer_orders.csv')

In [None]:
df.info()

In [None]:
plt.figure(figsize=(20,20))
sns.heatmap(df.isnull().T,xticklabels=False,cbar=False,cmap='coolwarm')

In [None]:
Order_number_popular=df.groupby('wet_food_order_number').count()['pet_id']

In [None]:
df.describe()


1. Which customers order and reorder the wet food, and when are they likely to try it?
2. Are there certain characteristics of the pets, customers or their orders that impact how likely they are to purchase the wet food, and keep ordering it?
3. Based on this data, what do you suggest we focus on to get more customers taking and continuing to enjoy our wet food?


In [None]:
df.head()

In [None]:

plt.figure(figsize=(15,10))
sns.barplot(x=Order_number_popular.index,y=Order_number_popular)
plt.ylabel('User count')
plt.xlabel('Food Order No.');

# Notes :
wet_food order no 1 is  most popular.

wet_food order no 20 is  least popular


## Which customers order and reorder the wet food, and when are they likely to try it?

In [None]:
rec=[]
for x in list(df.columns):
       if 'wet' in x:
            rec.append(x)
rec

In [None]:
wet_food_like=df[['wet_food_order_number',
 'orders_since_first_wet_trays_order',
 'ate_wet_food_pre_tails',
 'wet_kcal',
 'wet_trays',
 'wet_food_discount_percent',
 'wet_tray_size',
 'wet_food_textures_in_order',
 'total_wet_food_updates',
 'total_wet_food_updates_since_last_order']].copy()

In [None]:


wet_food_like=wet_food_like[wet_food_like['wet_food_order_number'].notna() | wet_food_like['orders_since_first_wet_trays_order'].notna()]
wet_food_like

In [None]:
wet_food_like=wet_food_like[wet_food_like['wet_food_order_number']==wet_food_like['orders_since_first_wet_trays_order']]
wet_food_like

In [None]:
Rating=pd.DataFrame(wet_food_like.groupby('wet_food_order_number').count()['orders_since_first_wet_trays_order']).rename(columns={'orders_since_first_wet_trays_order':'Total Count'})

In [None]:
Rating

In [None]:
plt.figure(figsize=(20,10))
sns.barplot(x=Rating.index,y='Total Count',data=Rating,palette='Spectral')
plt.title('People Continue to like The Food order',fontsize=20);

In [None]:
#Food_Dont_liked_By_User_Count= wet_food_like[wet_food_like['wet_food_order_number']!=wet_food_like['orders_since_first_wet_trays_order']].dropna(axis=0).groupby('wet_food_order_number').count()['orders_since_first_wet_trays_order']


## Are there certain characteristics of the pets, customers or their orders that impact how likely they are to purchase the wet food, and keep ordering it?

In [None]:
df.groupby('wet_food_textures_in_order').count()

In [None]:
df.head(10)

In [None]:
food_texture=pd.DataFrame(index=df.index,columns=['gravy','pate','jelly'])
food_texture
    

In [None]:
def hell(x,y='gravy'):
    if type(x) is float:
        return 0
    else:
        if y in x:
            return 1
        else:
            return 0            

food_texture['gravy']=df['wet_food_textures_in_order'].apply(lambda x: hell(x))
food_texture['jelly']=df['wet_food_textures_in_order'].apply(lambda x: hell(x,'jelly'))
food_texture['pate']=df['wet_food_textures_in_order'].apply(lambda x: hell(x,'pate'))




In [None]:
food_texture.sample(10)

In [None]:
food_texture.sum()

In [None]:
print(food_texture.corr())
sns.heatmap(food_texture.corr(),cmap='Reds_r',annot=True)

In [None]:
Why_they_Ordering_Same_food=df[df['wet_food_order_number']==df['orders_since_first_wet_trays_order']].copy()
Why_they_Ordering_Same_food.drop(columns=['customer_id','pet_id','pet_signup_datetime','last_customer_support_ticket_date','order_payment_date'],inplace=True)
Why_they_Ordering_Same_food=Why_they_Ordering_Same_food.merge(food_texture,right_index=True,left_index=True)
Why_they_Ordering_Same_food

In [None]:
plt.figure(figsize=(20,10))
sns.heatmap(Why_they_Ordering_Same_food.corr(),cmap='Reds',linewidths=1,annot=True)
# Surprisingly total_web session giving high ralation in order_since_first

In [None]:
plt.figure(figsize=(20,10))
sns.scatterplot(x='total_web_sessions',y='orders_since_first_wet_trays_order',data=Why_they_Ordering_Same_food,hue='orders_since_first_wet_trays_order',palette='Set1')

In [None]:
Ralation_Websessions=Why_they_Ordering_Same_food[['orders_since_first_wet_trays_order','total_web_sessions']]
Ralation_Websessions

In [None]:
Ralation_Websessions.sort_values('total_web_sessions',ascending=False)

In [None]:
Ralation_Websessions=Ralation_Websessions.groupby('orders_since_first_wet_trays_order').sum()
Ralation_Websessions

In [None]:
plt.figure(figsize=(20,10))
sns.barplot(x=Ralation_Websessions.index,y='total_web_sessions',data=Ralation_Websessions)

In [None]:
Why_they_Ordering_Same_food.plot(figsize=(30,30),fontsize=20)

