## Import Libraries

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns 
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
data = pd.read_csv('../input/avocado-prices/avocado.csv')

In [None]:
data.head()

In [None]:
data.shape

In [None]:
data.info()

In [None]:
#missing values
data.isnull().sum()

In [None]:
## i will rename some columns for more flexibility
data.rename(columns={'AveragePrice':'avprice','Small Bags':'Sbags','Large Bags':'Lbags','XLarge Bags':'XLbags'},inplace=True)

In [None]:
data.head()

## AveragePrice analysis

In [None]:
data.avprice.describe()

In [None]:
sns.set(style='darkgrid')
plt.figure(figsize=(11,9))
a=sns.distplot(data.avprice,color='r')
a.set_xlabel('AvragePrice')
a.set_ylabel('Frequency')
plt.title('Distribution of Average Price',size=25)


## Totale Volume

In [None]:
data['Total Volume'].describe()

In [None]:
plt.figure(figsize=(11,9))
a=sns.kdeplot(data['Total Volume'],color='g',shade=True)
a.set_xlabel('Total Volume')
a.set_ylabel('Frequency')
plt.title('Distribution of Total Volume',size=25)


In [None]:
data[data['Total Volume']<1500000].shape

In [None]:
data[data['Total Volume']>5000000].sort_values(by='Total Volume',ascending=False)

In [None]:
## correlation between them 
print('the correlation between AveragePrice and Total volume :',data['avprice'].corr(data['Total Volume']))

In [None]:
a=sns.jointplot(x='Total Volume',y='avprice',data=data,color='g',height=9)


In [None]:
plt.figure(figsize=(11,9))
a=sns.regplot(x='Total Volume',y='avprice',data=data[data['Total Volume']<1500000],color='c')
plt.title('Average Price vs Total Volume',size=25)

## Results :

## Region

In [None]:
data.region.unique()

In [None]:
Region=data.groupby('region').avprice.mean().reset_index().sort_values(by='avprice')


In [None]:
Region.head()

In [None]:
Region.tail()

In [None]:
plt.figure(figsize=(11,9))
a=sns.boxplot(x='region',y='avprice',data=data,palette='nipy_spectral')
a.set_xticklabels(a.get_xticklabels(), rotation=90, ha="right",size=12)
plt.title('Boxplot AveragePrice Vs Region',size=30)

## Results :

## Type

In [None]:
data.type.unique()

In [None]:
data.type.value_counts()

In [None]:
data.groupby('type').avprice.mean().reset_index()

In [None]:
a=sns.catplot(x='type',y='avprice',data=data,palette='mako',height=10,kind='boxen')
plt.title('Boxen plot of AverigePrive for each type',size=25)

In [None]:
a=sns.catplot(x='type',y='Total Volume',data=data[data['Total Volume']<1500000],palette='mako',height=10,kind='box')
plt.title('Boxen plot of Total Volume for each type',size=25)

In [None]:
sns.relplot(x="Total Volume",y='avprice',hue='type',data=data,height=10)
plt.title('AveragePrice Vs Total Volume for each type',size=25)

In [None]:
plt.figure(figsize=(13,15))
a=sns.barplot(x='avprice',y='region',data=data,palette='nipy_spectral',hue='type')
a.set_yticklabels(a.get_yticklabels(),size=16)
plt.title('Barplot AveragePrice Vs Region for each Type',size=30)

In [None]:
plt.figure(figsize=(10,15))
a=sns.barplot(x='Total Volume',y='region',data=data[data.type=='organic'].query('region != "TotalUS"'),palette='coolwarm')
a.set_yticklabels(a.get_yticklabels(),size=16)
plt.title('Total Volume for organic for each Region',size=30)

In [None]:
plt.figure(figsize=(10,15))
a=sns.barplot(x='Total Volume',y='region',data=data[data.type=='conventional'].query('region != "TotalUS"'),palette='coolwarm')
a.set_yticklabels(a.get_yticklabels(),size=16)
plt.title('Total Volume for conventional for each Region',size=30)

## Results :

## Year

In [None]:
data.year.unique()

In [None]:
data.Date=pd.to_datetime(data.Date)

In [None]:
data['month']=data.Date.dt.month

In [None]:
price_years=data.groupby(['year','month','type'],as_index=False)['avprice'].mean()
price_years

## Let's see the mean of AveragePrice each month over  years

##  2015

In [None]:
plt.figure(figsize=(13,9))
a=sns.lineplot(x='month',y='avprice',data=price_years.query('year=="2015"')
            ,hue='type',markers=True,style='type'
            ,palette='gnuplot2' )

## 2016

In [None]:
plt.figure(figsize=(13,9))
a=sns.lineplot(x='month',y='avprice',data=price_years.query('year=="2016"')
            ,hue='type',markers=True,style='type'
            ,palette='gnuplot2' )

# 2017

In [None]:
plt.figure(figsize=(13,9))
a=sns.lineplot(x='month',y='avprice',data=price_years.query('year=="2017"')
            ,hue='type',markers=True,style='type'
            ,palette='gnuplot2' )

## 2018

In [None]:
plt.figure(figsize=(13,9))
a=sns.lineplot(x='month',y='avprice',data=price_years.query('year=="2018"')
            ,hue='type',markers=True,style='type'
            ,palette='gnuplot2' )

## Results :

## Averge Pice of Avocado for each Region over the four years

In [None]:
sns.factorplot('avprice','region',data=data.query("type=='conventional'"),
                hue='year',
                size=15,
                palette='tab20',
                join=False,
                aspect=0.7,
              )
plt.title('For Conventional',size=25)

In [None]:
sns.factorplot('avprice','region',data=data.query("type=='organic'"),
                hue='year',
                size=15,
                palette='tab20',
                join=False,
                aspect=0.7,
              )
plt.title('For Organic',size=25)

## Results :

### !!! Thank you waiting for your remarks