In [1]:
import numpy as np
import pandas as pd

A DataFrame is a Series in 2D 

Think of it as an ordered collection of columns - each column is a Series, so a DataFrame is a dict of Series

Access a Series using a column index and an element using a row index

In [5]:
## create two series
cos_s = pd.Series({str(x):np.cos(x) for x in np.linspace(0, np.pi, 5)})
sin_s = pd.Series({str(x):np.sin(x) for x in np.linspace(0, np.pi, 5)})

## and now construct a DataFrame as a dict of two series
df = pd.DataFrame({"cos":cos_s, "sin":sin_s})
print(df)

                             cos           sin
0.0                 1.000000e+00  0.000000e+00
0.7853981633974483  7.071068e-01  7.071068e-01
1.5707963267948966  6.123234e-17  1.000000e+00
2.356194490192345  -7.071068e-01  7.071068e-01
3.141592653589793  -1.000000e+00  1.224647e-16


In [3]:
# a few of the DataFrame properties...
print(df.values)

[[ 1.00000000e+00  0.00000000e+00]
 [ 7.07106781e-01  7.07106781e-01]
 [ 6.12323400e-17  1.00000000e+00]
 [-7.07106781e-01  7.07106781e-01]
 [-1.00000000e+00  1.22464680e-16]]


In [4]:
print(df.index)

Index(['0.0', '0.7853981633974483', '1.5707963267948966', '2.356194490192345',
       '3.141592653589793'],
      dtype='object')


In [6]:
print(df.columns)

Index(['cos', 'sin'], dtype='object')


In [7]:
print(df['0.0'])

KeyError: '0.0'

In [8]:
print(df['cos'])

0.0                   1.000000e+00
0.7853981633974483    7.071068e-01
1.5707963267948966    6.123234e-17
2.356194490192345    -7.071068e-01
3.141592653589793    -1.000000e+00
Name: cos, dtype: float64


In [21]:
print(df.loc['0.0'])

cos    1.0
sin    0.0
Name: 0.0, dtype: float64


In [22]:
print(df.iloc[0])

cos    1.0
sin    0.0
Name: 0.0, dtype: float64


In [19]:
# Creating a DataFrame by specifying everything...
df2 = pd.DataFrame(np.random.rand(4, 3),
                  columns = ['c1', 'c2', 'c3'],
                  index = ['r1', 'r2', 'r3', 'r4'])
print(df2)

          c1        c2        c3
r1  0.098307  0.645871  0.545461
r2  0.057909  0.804760  0.435866
r3  0.673927  0.271851  0.519343
r4  0.736589  0.551865  0.005450


In [24]:
print(df2.c2)

r1    0.645871
r2    0.804760
r3    0.271851
r4    0.551865
Name: c2, dtype: float64


In [25]:
print(df.cos)

0.0                   1.000000e+00
0.7853981633974483    7.071068e-01
1.5707963267948966    6.123234e-17
2.356194490192345    -7.071068e-01
3.141592653589793    -1.000000e+00
Name: cos, dtype: float64


In [8]:
df['cos_sq'] = df.cos*df.cos
print(df)

                             cos           sin        cos_sq
0.0                 1.000000e+00  0.000000e+00  1.000000e+00
0.7853981633974483  7.071068e-01  7.071068e-01  5.000000e-01
1.5707963267948966  6.123234e-17  1.000000e+00  3.749399e-33
2.356194490192345  -7.071068e-01  7.071068e-01  5.000000e-01
3.141592653589793  -1.000000e+00  1.224647e-16  1.000000e+00


In [29]:
#transposing a DataFrame
df2_T = df2.T
print(df2)
print(df2_T)


          c1        c2        c3
r1  0.098307  0.645871  0.545461
r2  0.057909  0.804760  0.435866
r3  0.673927  0.271851  0.519343
r4  0.736589  0.551865  0.005450
          r1        r2        r3        r4
c1  0.098307  0.057909  0.673927  0.736589
c2  0.645871  0.804760  0.271851  0.551865
c3  0.545461  0.435866  0.519343  0.005450


In [31]:
print(df2.r1)

AttributeError: 'DataFrame' object has no attribute 'r1'

In [33]:
print(df2.values[0])

[0.09830723 0.6458707  0.54546132]


In [38]:
print(np.linspace(0,np.pi, 7))

[0.         0.52359878 1.04719755 1.57079633 2.0943951  2.61799388
 3.14159265]


In [2]:
#Create a new DataFrame with sin and cos values for 8 values and add this to df
#So the column names are the same and indexes vary
cos_s7 = pd.Series({str(x):np.cos(x) for x in np.linspace(0, np.pi, 7)})
sin_s7 = pd.Series({str(x):np.sin(x) for x in np.linspace(0, np.pi, 7)})
df3 = pd.DataFrame({'cos':cos_s7, 'sin':sin_s7})
print(df3)

                             cos           sin
0.0                 1.000000e+00  0.000000e+00
0.5235987755982988  8.660254e-01  5.000000e-01
1.0471975511965976  5.000000e-01  8.660254e-01
1.5707963267948966  6.123234e-17  1.000000e+00
2.0943951023931953 -5.000000e-01  8.660254e-01
2.617993877991494  -8.660254e-01  5.000000e-01
3.141592653589793  -1.000000e+00  1.224647e-16


In [9]:
print(df)

                             cos           sin        cos_sq
0.0                 1.000000e+00  0.000000e+00  1.000000e+00
0.7853981633974483  7.071068e-01  7.071068e-01  5.000000e-01
1.5707963267948966  6.123234e-17  1.000000e+00  3.749399e-33
2.356194490192345  -7.071068e-01  7.071068e-01  5.000000e-01
3.141592653589793  -1.000000e+00  1.224647e-16  1.000000e+00


In [10]:
df4 = df + df3
print(df4)

                             cos  cos_sq           sin
0.0                 2.000000e+00     NaN  0.000000e+00
0.5235987755982988           NaN     NaN           NaN
0.7853981633974483           NaN     NaN           NaN
1.0471975511965976           NaN     NaN           NaN
1.5707963267948966  1.224647e-16     NaN  2.000000e+00
2.0943951023931953           NaN     NaN           NaN
2.356194490192345            NaN     NaN           NaN
2.617993877991494            NaN     NaN           NaN
3.141592653589793  -2.000000e+00     NaN  2.449294e-16


Accessing a column: either dict notation (f[colName]) or as an attribute (f.colName)
Accessing a row: f.loc[index]