# A very short introduction to pandas

In [None]:
import pandas as pd
import numpy as np

## Introduction to Pandas Data Structures

### Declaring a Series 

In [None]:
# a series is a on column data frame
s = pd.Series([12, -4, 7, 9])
s

In [None]:
s = pd.Series([12, -4, 7, 9], index=['a', 'b', 'c', 'd'])
s

In [None]:
# selecting the values
s.values

In [None]:
# selection the indexes
s.index

In [None]:
s.array

### Selecting the Internal Elements

In [None]:
s.iloc[2]

In [None]:
s["b"]

In [None]:
s[0:2]

In [None]:
s[['b','c']]

### Assigning Values to the Elements

In [None]:
s.iloc[1] = 0
s

In [None]:
s['b'] = 1
s

### Defining Series from NumPy Arrays and Other Series

In [None]:
arr = np.array([1, 2, 3, 4])
s3 = pd.Series(arr)
s3

In [None]:
s4 = pd.Series(s)
s4

In [None]:
s5 = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
s4+s5

### Filtering Values 

In [None]:
s[s > 8]

### Operations and Mathematical Functions

In [None]:
s / 2

In [None]:
np.log(s)

### Evaluating Values

In [None]:
serd = pd.Series([1,0,2,1,2,3], index=['white', 'white', 'blue', 'green', 'green', 'yellow'])
serd

In [None]:
serd.unique()

In [None]:
serd.value_counts()

In [None]:
serd.isin([0,3])

In [None]:
serd[serd.isin([0,3])]

### NaN Values

In [None]:
s2 = pd.Series([5, -3, np.NaN, 14])
s2

In [None]:
s2.isnull()

In [None]:
s2.notnull()

In [None]:
s2[s2.notnull()]

In [None]:
s2[s2.isnull()]

## The DataFrame

### Defining a DataFrame

In [None]:
data = {'farbe' : ['blau', 'gruen', 'gelb', 'rot', 'weiss'],
        'objekt' : ['ball', 'stift', 'bleistift', 'papier', 'tasse'],
        'preis' : [1.2, 1.0, 0.6, 0.9, 1.7]}
frame = pd.DataFrame(data)
frame

In [None]:
frame2 = pd.DataFrame(data, columns=['objekt', 'preis'])
frame2

In [None]:
frame2 = pd.DataFrame(data, index=['eins', 'zwei', 'drei', 'vier', 'fuenf'])
frame2

### Selecting Elements

In [None]:
frame.columns

In [None]:
frame.index

In [None]:
frame.values

In [None]:
frame['preis']

In [None]:
frame.preis

In [None]:
frame[["farbe","preis"]]

In [None]:
frame[frame["preis"]>1.0]

In [None]:
a = frame.loc[2]
a

In [None]:
frame.iloc[2]

In [None]:
frame.iloc[[2,4]]

In [None]:
frame[0:1]

In [None]:
frame[1:3]

In [None]:
frame['objekt'][3]

### Assigen new column

In [None]:
frame['new'] = 12
frame

In [None]:
frame['new'] = [3.0, 1.3, 2.2, 0.8, 1.1]
frame

In [None]:
frame.loc[(2,'preis')] = 3.3
frame

In [None]:
frame.iloc[(2,2)] = 0.6
frame

### Deleting a Column

In [None]:
frame.drop(columns=['new'],inplace=True)
frame

## Function Application and Mapping

### Functions by Element

In [None]:
frame = pd.DataFrame(np.arange(16).reshape((4,4)),
          index=['red', 'blue', 'yellow', 'white'],
          columns=['ball','pen','pencil','paper'])
np.sqrt(frame)

In [None]:
frame.sum()

In [None]:
frame.mean()

In [None]:
frame.describe()

## "Not a Number" Data

### Assigning a NaN Value

In [None]:
ser = pd.Series([0,1,2,np.NaN,9], index=['rot','blau','gelb','gruen', 'weiss'])
ser

In [None]:
ser['weiss'] = None
ser

### FIltering Out NaN Values

In [None]:
ser.dropna()

In [None]:
ser[ser.notnull()]

In [None]:
frame3 = pd.DataFrame([[6,np.nan,6],[np.nan,np.nan,np.nan],[2,np.nan,5]],
                     index=['blau','gruen','rot'],
                     columns=['ball','mug','pen'])
frame3

In [None]:
frame3.dropna()

In [None]:
frame3.dropna(how='all')

### Filliing in NaN Occurrences

In [None]:
frame3.fillna(0)

In [None]:
frame3.fillna({'ball':1, 'mug':0, 'pen': 99})