## Pandas
Install pandas if needed

`!pip install pandas`

In [1]:
# import libraries

import pandas as pd
import os

## DataFrame
You can create a DataFrame from pre-defined values or from a file

In [2]:
# create a DataFrame like this
df = pd.DataFrame({
    'name': ["John","Dave","Steve","Amy"],
    'gender': ["Male","Male","Male","Female"],
    'age': [25,29,30,31]
})

# display the result
df

Unnamed: 0,name,gender,age
0,John,Male,25
1,Dave,Male,29
2,Steve,Male,30
3,Amy,Female,31


In [3]:
# or from .csv
data = pd.read_csv("data/people.csv")

data

Unnamed: 0,name,gender,age
0,John,Male,25
1,Dave,Male,29
2,Steve,Male,30
3,Amy,Female,31
4,Sameer,Male,50
5,Suzan,Female,62
6,Jeff,Male,57
7,Asmel,Male,20
8,Boby,Male,42
9,Farr,Male,54


In [4]:
# Generate descriptive statistics

df.describe(include='all')

Unnamed: 0,name,gender,age
count,4,4,4.0
unique,4,2,
top,Dave,Male,
freq,1,3,
mean,,,28.75
std,,,2.629956
min,,,25.0
25%,,,28.0
50%,,,29.5
75%,,,30.25


In [5]:
# get subset of data

subset = data[['name','age']]

subset

Unnamed: 0,name,age
0,John,25
1,Dave,29
2,Steve,30
3,Amy,31
4,Sameer,50
5,Suzan,62
6,Jeff,57
7,Asmel,20
8,Boby,42
9,Farr,54


In [6]:
# group by 

groupby_df = data.groupby(by="gender").count()

groupby_df

Unnamed: 0_level_0,name,age
gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,4,4
Male,9,9


In [7]:
# aggregation

data['age'].min()

20

In [8]:
data['age'].max()

62

In [9]:
data['age'].mean()

39.23076923076923

In [10]:
data['age'].std()

13.311360099265128

In [11]:
# multiple aggregationo functions in one line for all columns
data.agg(['sum', 'min','max','mean','std'])

Unnamed: 0,name,gender,age
sum,JohnDaveSteveAmySameerSuzanJeffAsmelBobyFarrRo...,MaleMaleMaleFemaleMaleFemaleMaleMaleMaleMaleMa...,510.0
min,Amy,Female,20.0
max,Suzan,Male,62.0
mean,,,39.230769
std,,,13.31136


In [12]:
# multiple aggregationo functions in one line for a single column
data['age'].agg(['sum', 'min','max','mean','std'])

sum     510.000000
min      20.000000
max      62.000000
mean     39.230769
std      13.311360
Name: age, dtype: float64

In [13]:
path = os.path.dirname(os.path.realpath("__file__"))
subset.to_csv(path + "/out/subset.csv")