# [Pandas Basics II](https://data.compass.lighthouselabs.ca/28b18104-1a70-494d-928f-b39da3324ebd)

In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame({
'one': pd.Series(np.random.randn(3), index=['a', 'b', 'c']),
'two': pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd']),
'three': pd.Series(np.random.randn(3), index=['b', 'c', 'd'])})

In [3]:
df2=df.copy()

In [4]:
df.gt(df2)

Unnamed: 0,one,two,three
a,False,False,False
b,False,False,False
c,False,False,False
d,False,False,False


In [5]:
df2.ne(df)

Unnamed: 0,one,two,three
a,False,False,True
b,False,False,False
c,False,False,False
d,True,False,False


In [6]:
(df > 0).all()

one      False
two      False
three    False
dtype: bool

In [7]:
(df > 0).any()

one      True
two      True
three    True
dtype: bool

In [8]:
pd.Series([True]).bool()


True

In [9]:
pd.Series([False]).bool()

False

In [10]:
## comparisons can only be made with 1 or equal lengths
pd.Series(['foo', 'bar', 'baz']) == 'foo'

0     True
1    False
2    False
dtype: bool

In [11]:
pd.Series(['foo', 'bar', 'baz']) == pd.Index(['foo', 'bar', 'qux'])

0     True
1     True
2    False
dtype: bool

In [12]:
(df + df == df * 2).all().all()

False

In [13]:
(df + df == df * 2).all()

one      False
two       True
three    False
dtype: bool

In [14]:
df + df == df * 2

Unnamed: 0,one,two,three
a,True,True,False
b,True,True,True
c,True,True,True
d,False,True,True


In [15]:
np.nan == np.nan

False

In [16]:
# Use equals instead so nans are equal
(df + df).equals(df * 2)

True

## Descriptive Stats

In [18]:
## Aggregation for each column
df.mean(0)

one      0.128829
two      0.852517
three    0.326001
dtype: float64

In [19]:
# Aggregation for each index
df.mean(1)

a   -0.427198
b    0.377278
c    0.882669
d    0.924556
dtype: float64

In [20]:
ts_stand = (df - df.mean()) / df.std()
ts_stand.std()

one      1.0
two      1.0
three    1.0
dtype: float64

In [21]:
## describe prints out useful information about a series or df

series = pd.Series(np.random.randn(1000))
series[::2] = np.nan
series.describe()


count    500.000000
mean       0.030453
std        1.002658
min       -2.961190
25%       -0.592659
50%        0.026326
75%        0.715344
max        3.062229
dtype: float64

In [25]:
frame = pd.DataFrame(np.random.randn(1000, 5),
columns=['a', 'b', 'c', 'd', 'e'])

In [27]:
frame.iloc[::2] = np.nan
frame.describe()

Unnamed: 0,a,b,c,d,e
count,500.0,500.0,500.0,500.0,500.0
mean,0.065799,0.052849,-0.025956,0.020062,0.027874
std,1.028753,1.005423,0.969658,1.007095,1.051768
min,-3.054546,-3.674618,-3.045613,-2.685497,-3.34945
25%,-0.595017,-0.546107,-0.682346,-0.632873,-0.703626
50%,0.101579,0.039178,-0.04534,0.011542,0.050584
75%,0.767548,0.689482,0.594869,0.695607,0.73708
max,3.149203,3.207696,2.686458,3.665497,3.919239


In [28]:
s = pd.Series(['a', 'a', 'b', 'b', 'a', 'a', np.nan, 'c', 'd', 'a'])
s.describe()

count     9
unique    4
top       a
freq      5
dtype: object

In [29]:
s1 = pd.Series(np.random.randn(5))
s1

0    0.366806
1   -1.923012
2    1.480246
3   -0.302349
4    1.661057
dtype: float64

In [30]:
s1.idxmin(), s1.idxmax()

(1, 4)

In [31]:
df1 = pd.DataFrame(np.random.randn(5, 3), columns=['A', 'B', 'C'])

In [32]:
df1

Unnamed: 0,A,B,C
0,-0.364403,-0.813078,-1.273369
1,0.715642,-2.685777,-0.234436
2,0.249721,-1.555832,0.149391
3,0.068914,-0.951668,0.068858
4,2.25125,-1.046774,-0.350851


In [34]:
df1.idxmin(axis=0)

A    0
B    1
C    0
dtype: int64

In [35]:
df.idxmax(axis=1)

a    one
b    two
c    two
d    two
dtype: object

In [36]:
df = pd.DataFrame({'col1': np.random.randn(3),
'col2': np.random.randn(3)}, index=['a', 'b', 'c'])

In [37]:
for col in df:
    print(col)

col1
col2


In [38]:
## items(), iterrows(), itertuples() to iterate over rows
df = pd.DataFrame({'a': [1, 2, 3], 'b': ['a', 'b', 'c']})
for label, ser in df.items():
    print(label)
    print(ser)

a
0    1
1    2
2    3
Name: a, dtype: int64
b
0    a
1    b
2    c
Name: b, dtype: object


In [40]:
for row_index, row in df.iterrows():
    print(row_index, row, sep='\n')

0
a    1
b    a
Name: 0, dtype: object
1
a    2
b    b
Name: 1, dtype: object
2
a    3
b    c
Name: 2, dtype: object


In [41]:
for row in df.itertuples():
    print(row)

Pandas(Index=0, a=1, b='a')
Pandas(Index=1, a=2, b='b')
Pandas(Index=2, a=3, b='c')
