In [1]:
import numpy as np
import pandas as pd

In [2]:
def create_df(missing=False, n=10):
    itemid   = np.random.randint(100000, size=n)+1000
    category = np.random.randint(3, size=n)
    price    = np.round(np.random.normal(loc=100, scale=10, size=n),2)
    margin   = np.round(np.random.normal(loc=10, scale=1, size=n),2)
    
    if missing:
        nmissing = np.random.randint(len(price)//2)+2                                     
        price[np.random.permutation(len(price))[:nmissing]] = np.nan
    
    d = pd.DataFrame(np.r_[[price, category, margin]].T, index=itemid, columns=["price", "category", "margin"])
    d.index.name="itemid"
    if np.random.random()>.5:
        d = d[d.columns[:2]]
        
    return d

In [4]:
d = create_df()
d

Unnamed: 0_level_0,price,category
itemid,Unnamed: 1_level_1,Unnamed: 2_level_1
95476,86.67,2.0
92336,96.84,2.0
33374,93.27,2.0
80526,116.21,0.0
74008,111.97,2.0
88940,102.49,2.0
35493,118.7,2.0
48943,83.39,0.0
50444,110.35,1.0
19125,103.59,0.0


# Task 1: Group statistics

Complete the following function so that it returns a dataframe with the average, max and min **prices** per category. 

For instance, for the following dataframe

             price  category  margin
    itemid                          
    17946    93.85       1.0   10.64
    61190    91.72       1.0    9.76
    39639   100.16       1.0   10.67
    17791   110.44       2.0    9.65
    7333    101.05       1.0    9.69
    77362   122.33       0.0   11.14
    92646   108.13       2.0   10.58
    27797    85.52       2.0   10.88
    31746    97.56       0.0    9.75
    12355   101.04       2.0    9.51
    
you should return the following dataframe

                 media  maximo  minimo
    categoria                         
    0         109.9450  122.33  97.56
    1          96.6950  101.05  91.72
    2         101.2825  110.44  85.52
    
observe that your result
- must not be a **multilevel** columnset.
- the column names and the index name must be **exactly** as in the example.
- the **index** must be of type **int**.

In [60]:
def get_stats(df):
    mean_= df.groupby(by='category').mean()
    max_= df.groupby(by='category').max()
    min_= df.groupby(by='category').min()
    d= pd.concat([mean_.price,max_.price,min_.price],axis=1)
    d.set_axis(['media', 'maximo', 'minimo'],axis='columns', inplace=True)
    return d

**manually check your answer**

In [61]:
d = create_df()
d

Unnamed: 0_level_0,price,category,margin
itemid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
91912,86.55,1.0,10.3
1263,92.0,0.0,10.36
59689,111.81,1.0,8.61
68914,105.01,0.0,9.36
34046,103.8,0.0,9.72
14857,112.0,1.0,10.37
86905,107.48,2.0,9.47
55617,113.32,2.0,11.08
60288,89.54,2.0,9.95
16776,100.29,2.0,9.19


In [62]:
nf = get_stats(d)
nf

Unnamed: 0_level_0,media,maximo,minimo
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0.0,100.27,105.01,92.0
1.0,103.453333,112.0,86.55
2.0,102.6575,113.32,89.54


**Check that your implementation is correct comparing with the example shown in the problem statement:**

In [63]:
array = np.array([[93.85, 1.0, 10.64],
                 [91.72, 1.0, 9.76],
                 [100.16, 1.0, 10.67],
                 [110.44, 2.0, 9.65],
                 [101.05, 1.0, 9.69],
                 [122.33, 0.0, 11.14],
                 [108.13, 2.0, 10.58],
                 [85.52, 2.0, 10.88],
                 [97.56, 0.0, 9.75],
                 [101.04, 2.0, 9.51]])
df_test = pd.DataFrame(array, columns = ['price', 'category', 'margin'])
df_test

Unnamed: 0,price,category,margin
0,93.85,1.0,10.64
1,91.72,1.0,9.76
2,100.16,1.0,10.67
3,110.44,2.0,9.65
4,101.05,1.0,9.69
5,122.33,0.0,11.14
6,108.13,2.0,10.58
7,85.52,2.0,10.88
8,97.56,0.0,9.75
9,101.04,2.0,9.51


In [64]:
get_stats(df_test)

Unnamed: 0_level_0,media,maximo,minimo
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0.0,109.945,122.33,97.56
1.0,96.695,101.05,91.72
2.0,101.2825,110.44,85.52
