In [21]:
#numpy and pandas

import numpy as np
import pandas as pd

from pandas import Series, DataFrame


In [4]:
#to create a series from a list

s1 = Series([1,2,3,4,5])

s1
#on the left is the index and right values

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [5]:
#so you can actually list values like this
s1.values

array([1, 2, 3, 4, 5], dtype=int64)

In [6]:
#and index like this
s1.index

RangeIndex(start=0, stop=5, step=1)

In [7]:
#can you create a custom index? yes like this

s2 = Series([20,25,26,12],['apple','orange','banana','grape'])
s2

apple     20
orange    25
banana    26
grape     12
dtype: int64

In [8]:
#to access the value via index
s2['orange']

25

In [9]:
#show values using operation
s2[s2>20]

orange    25
banana    26
dtype: int64

In [11]:
#since Series looks like a dictionary we can create dictionary from series
dict_from_series = s2.to_dict()
dict_from_series

{'apple': 20, 'orange': 25, 'banana': 26, 'grape': 12}

In [13]:
#and vice versa
series_from_dict = Series(dict_from_series)
series_from_dict

#and we get the orginal back

apple     20
orange    25
banana    26
grape     12
dtype: int64

In [None]:
#notes important
pd.isnull(s1) #checks for nulls in the series (boolean)
pd.notnull(s1) #returns not nulls in the series (boolean)

s2.name = "Fruit Market" #gives a name for the Series
s2.index.name = "Fruits" #gives the index column a name

In [2]:
#Sorting by index
s = Series(range(3),index=['C','A','B'])

s

C    0
A    1
B    2
dtype: int64

In [3]:
#Now sort_index
s.sort_index()

A    1
B    2
C    0
dtype: int64

In [5]:
#Can sort a Series by its values
s.sort_values()

C    0
A    1
B    2
dtype: int64

In [6]:
#Lets see how ranking works

from numpy.random import randn
ss = Series(randn(10))

ss

0   -1.488502
1    0.904847
2    0.872317
3   -0.508269
4   -0.314336
5    1.101294
6   -1.475461
7    0.676029
8   -0.574518
9    0.448070
dtype: float64

In [7]:
#This will show you the rank used if you sort the series
ss.rank()

0     1.0
1     9.0
2     8.0
3     4.0
4     5.0
5    10.0
6     2.0
7     7.0
8     3.0
9     6.0
dtype: float64

In [8]:
#On the left column we see th original index value and on the right we see it's rank!

In [9]:
#Lets start by making two Series

ser1 = Series([0,1,2],index=['A','B','C'])

#Show
ser1

A    0
B    1
C    2
dtype: int64

In [10]:
#Now second Series 2
ser2 = Series([3,4,5,6],index=['A','B','C','D'])

#Show 
ser2 

A    3
B    4
C    5
D    6
dtype: int64

In [11]:
#So what happens when we add these together
ser1 + ser2

A    3.0
B    5.0
C    7.0
D    NaN
dtype: float64

In [12]:
#Note the NaN values are added in automatically

In [23]:
#concatinate Series
#try in numpy first

# Create a matrix 
arr = np.arange(9).reshape((3,3))
arr

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [24]:
# Concatenate along axis 1
np.concatenate([arr1,arr1],axis=1) # meaning y-axis

array([[0, 1, 2, 0, 1, 2],
       [3, 4, 5, 3, 4, 5],
       [6, 7, 8, 6, 7, 8]])

In [25]:
# Concatenate along axis 1
np.concatenate([arr1,arr1],axis=0) #x-axis

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8],
       [0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [27]:
#now in series
# Lets create two Series with no overlap
s1 =  Series([0,1,2],index=['T','U','V'])

s2 = Series([3,4],index=['X','Y'])

#Now let use concat (default is axis=0)
pd.concat([s1,s2])

T    0
U    1
V    2
X    3
Y    4
dtype: int64

In [28]:
# Now passing along another axis will produce a DataFrame
pd.concat([s1,s2],axis=1)

Unnamed: 0,0,1
T,0.0,
U,1.0,
V,2.0,
X,,3.0
Y,,4.0


In [36]:
# We can specify which specific axes to be used
ss = pd.concat([s1,s2],axis=1)
ss.reindex(['U','V','Y'])

Unnamed: 0,0,1
U,1.0,
V,2.0,
Y,,4.0


In [38]:
# Lets say we wanted to add markers.keys to the concatenation result

# WE can do this with a hierarchical index
pd.concat([s1,s2],keys=['ctg1','ctg2'])

ctg1  T    0
      U    1
      V    2
ctg2  X    3
      Y    4
dtype: int64