In [2]:
import numpy as np
import pandas as pd

### This notebook contains 
- Basic Functionality of Series and Pandas
- Descriptive Statistics
- Function applicaton

#### Basic Functionality of Series-

1. axes: returns the list of label of the series
2. empty: returns the Boolean value saying whether the Object is empty or not. True indicates that the object is empty.
3. ndim: returns the number of dimensions of the object. By definition, a Series is a 1D data structure, so it returns 1
4. size: returns the size/length of the series
5. values: returns the actual data in the series as a list of array
6. head(): returns the sample of the data specified, from the top
7. tail(): returns the sample of the data specified, from the bottom

In [4]:
# create series with random numbers
series = pd.Series(np.random.randn(5))
series

0   -0.389239
1    1.687979
2   -0.106152
3   -2.283793
4   -0.893092
dtype: float64

In [5]:
series.axes

[RangeIndex(start=0, stop=5, step=1)]

In [10]:
series.empty

False

In [11]:
series.ndim

1

In [12]:
series.size

5

In [14]:
series.values

array([-0.38923896,  1.68797903, -0.10615156, -2.2837931 , -0.89309191])

In [17]:
series.head(2)

0   -0.389239
1    1.687979
dtype: float64

In [21]:
series.tail(2)

3   -2.283793
4   -0.893092
dtype: float64

#### Basic Functionalities of DataFrame-

1. T(Transpose): Returns the transpose of the DataFrame. The rows and columns will interchange.
2. axes
3. dtypes
4. empty
5. ndim
6. shape
7. size
8. values
9. head()
10. tail()

In [3]:
# creating dataframe

data = {'Name':pd.Series(['Tom','James','Ricky','Vin','Steve','Smith','Jack']),
   'Age':pd.Series([25,26,25,23,30,29,23]),
   'Rating':pd.Series([4.23,3.24,3.98,2.56,3.20,4.6,3.8])}

df = pd.DataFrame(data)
print(df)

    Name  Age  Rating
0    Tom   25    4.23
1  James   26    3.24
2  Ricky   25    3.98
3    Vin   23    2.56
4  Steve   30    3.20
5  Smith   29    4.60
6   Jack   23    3.80


In [28]:
df.T

Unnamed: 0,0,1,2,3,4,5,6
Name,Tom,James,Ricky,Vin,Steve,Smith,Jack
Age,25,26,25,23,30,29,23
Rating,4.23,3.24,3.98,2.56,3.2,4.6,3.8


In [30]:
df.axes

[RangeIndex(start=0, stop=7, step=1),
 Index(['Name', 'Age', 'Rating'], dtype='object')]

In [4]:
df['Rating'].dtypes

dtype('float64')

In [35]:
df.empty

False

In [36]:
df.ndim

2

In [37]:
df.shape

(7, 3)

In [41]:
df.size

21

In [43]:
df.values

array([['Tom', 25, 4.23],
       ['James', 26, 3.24],
       ['Ricky', 25, 3.98],
       ['Vin', 23, 2.56],
       ['Steve', 30, 3.2],
       ['Smith', 29, 4.6],
       ['Jack', 23, 3.8]], dtype=object)

In [44]:
df.head(2)

Unnamed: 0,Name,Age,Rating
0,Tom,25,4.23
1,James,26,3.24


In [46]:
df.tail(2)

Unnamed: 0,Name,Age,Rating
5,Smith,29,4.6
6,Jack,23,3.8


#### Descriptive Statistics

In [49]:
#Create a Dictionary of series
d = {'Name':pd.Series(['Tom','James','Ricky','Vin','Steve','Smith','Jack',
   'Lee','David','Gasper','Betina','Andres']),
   'Age':pd.Series([25,26,25,23,30,29,23,34,40,30,51,46]),
   'Rating':pd.Series([4.23,3.24,3.98,2.56,3.20,4.6,3.8,3.78,2.98,4.80,4.10,3.65])
}

#Create a DataFrame
df = pd.DataFrame(d)
print(df)

      Name  Age  Rating
0      Tom   25    4.23
1    James   26    3.24
2    Ricky   25    3.98
3      Vin   23    2.56
4    Steve   30    3.20
5    Smith   29    4.60
6     Jack   23    3.80
7      Lee   34    3.78
8    David   40    2.98
9   Gasper   30    4.80
10  Betina   51    4.10
11  Andres   46    3.65


#### 1. sum():
Returns the sum of the values for the requested axis. By default, axis is index (axis=0)

In [50]:
df.sum()   # each individual column is added individually (strings are appended)

Name      TomJamesRickyVinSteveSmithJackLeeDavidGasperBe...
Age                                                     382
Rating                                                44.92
dtype: object

In [51]:
# axis = 1
df.sum(axis=1)

0     29.23
1     29.24
2     28.98
3     25.56
4     33.20
5     33.60
6     26.80
7     37.78
8     42.98
9     34.80
10    55.10
11    49.65
dtype: float64

#### 2. mean():
Returns the average value

In [52]:
df.mean()

Age       31.833333
Rating     3.743333
dtype: float64

#### 3. std():

In [53]:
df.std()

Age       9.232682
Rating    0.661628
dtype: float64

In [64]:
df.count()   # column name,age,rating has each 12 rows

Name      12
Age       12
Rating    12
dtype: int64

In [76]:
df.count(axis = 1)  # rows 0,1,2,3... has each 3 columns

0     3
1     3
2     3
3     3
4     3
5     3
6     3
7     3
8     3
9     3
10    3
11    3
dtype: int64

In [65]:
df.median()

Age       29.50
Rating     3.79
dtype: float64

In [72]:
df.min()

Name      Andres
Age           23
Rating      2.56
dtype: object

In [73]:
df.max()

Name      Vin
Age        51
Rating    4.8
dtype: object

#### describe():
The describe() function computes a summary of statistics pertaining to the DataFrame columns.

In [77]:
df.describe()  # the function gives statistical summary only to 'numeric' columns

Unnamed: 0,Age,Rating
count,12.0,12.0
mean,31.833333,3.743333
std,9.232682,0.661628
min,23.0,2.56
25%,25.0,3.23
50%,29.5,3.79
75%,35.5,4.1325
max,51.0,4.8


In [84]:
df.describe(include = ['object'])

Unnamed: 0,Name
count,12
unique,12
top,Tom
freq,1


In [83]:
df.describe(include = 'all')

Unnamed: 0,Name,Age,Rating
count,12,12.0,12.0
unique,12,,
top,Tom,,
freq,1,,
mean,,31.833333,3.743333
std,,9.232682,0.661628
min,,23.0,2.56
25%,,25.0,3.23
50%,,29.5,3.79
75%,,35.5,4.1325


#### Function applications

- Table wise Function Application: pipe()
- Row or Column Wise Function Application: apply()
- Element wise Function Application: applymap()

#### Table wise Function Application: pipe()

In [85]:
def adder(ele1,ele2):
    return ele1+ele2

df = pd.DataFrame(np.random.randn(5,3),columns=['col1','col2','col3'])
print(df)

       col1      col2      col3
0  1.248952  0.937846 -0.098653
1 -1.007469 -0.646318  2.645139
2  0.231781 -1.039579  0.167962
3  0.651182  1.407755  2.155091
4 -0.026948  0.147184  0.225782


In [91]:
df.pipe(adder,2)

Unnamed: 0,col1,col2,col3
0,3.248952,2.937846,1.901347
1,0.992531,1.353682,4.645139
2,2.231781,0.960421,2.167962
3,2.651182,3.407755,4.155091
4,1.973052,2.147184,2.225782


In [95]:
def mult(ele1, ele2):
    return ele1*ele2

ds = pd.DataFrame([[1,2],[2,3],[3,4],[4,5]], columns = ['one', 'two'])
print(ds)
    

   one  two
0    1    2
1    2    3
2    3    4
3    4    5


In [96]:
ds.pipe(mult,2)

Unnamed: 0,one,two
0,2,4
1,4,6
2,6,8
3,8,10


#### Row or Column Wise Function Application: apply()

In [99]:
print(ds)

   one  two
0    1    2
1    2    3
2    3    4
3    4    5


In [100]:
# By default, the operation performs 'column' wise, taking each column as an array-like.
ds.apply(np.mean)  

one    2.5
two    3.5
dtype: float64

In [102]:
# mean by row wise
ds.apply(np.mean, axis = 1)

0    1.5
1    2.5
2    3.5
3    4.5
dtype: float64

In [107]:
ds['one'].apply(lambda x: x - 1)

0    0
1    1
2    2
3    3
Name: one, dtype: int64

#### Element Wise Function Application: applymap()

In [119]:
print(dg)

   one  two
0    1    2
1    2    3
2    3    4
3    4    5


In [124]:
dg.applymap(lambda x: x * 4)

Unnamed: 0,one,two
0,4,8
1,8,12
2,12,16
3,16,20


In [126]:
dg['one'].map(lambda x: x * 3)

0     3
1     6
2     9
3    12
Name: one, dtype: int64