# DataFrames_1

A DataFrame is a two dimensional data structure where data is aligned in rows and columns.

In [1]:
import numpy as np
import pandas as pd

In [2]:
from numpy.random import randn

In [3]:
np.random.seed(101) # A seed means is just to make sure that we get the same random numbers

In [4]:
df = pd.DataFrame(randn(5,4),["A","B","C","D","E"],["X","Y","Z","T"])

In [5]:
df

Unnamed: 0,X,Y,Z,T
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


## Selection and Indexing

In [6]:
df['X']

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: X, dtype: float64

In [7]:
type(df['X'])

pandas.core.series.Series

In [8]:
type(df)

pandas.core.frame.DataFrame

In [9]:
df[['X','Y']]

Unnamed: 0,X,Y
A,2.70685,0.628133
B,0.651118,-0.319318
C,-2.018168,0.740122
D,0.188695,-0.758872
E,0.190794,1.978757


In [10]:
# Creating new column

df["new"] = df["X"] + df["Z"]

In [11]:
df

Unnamed: 0,X,Y,Z,T,new
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [12]:
# Dropping new column

df.drop('new', axis=1)

Unnamed: 0,X,Y,Z,T
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [13]:
df # When we check df, we see "new" column isn't dropped; so we will use "inplace"

Unnamed: 0,X,Y,Z,T,new
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [14]:
df.drop('new', axis=1, inplace = True)

In [15]:
df

Unnamed: 0,X,Y,Z,T
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [16]:
# Dropping row

df.drop("E", axis = 0)

Unnamed: 0,X,Y,Z,T
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


In [17]:
df

Unnamed: 0,X,Y,Z,T
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [18]:
df.shape

(5, 4)

In [19]:
df[['T','X']]

Unnamed: 0,T,X
A,0.503826,2.70685
B,0.605965,0.651118
C,-0.589001,-2.018168
D,0.955057,0.188695
E,0.683509,0.190794


In [20]:
df.loc['C'] 

X   -2.018168
Y    0.740122
Z    0.528813
T   -0.589001
Name: C, dtype: float64

In [21]:
df.iloc[2]

X   -2.018168
Y    0.740122
Z    0.528813
T   -0.589001
Name: C, dtype: float64

In [22]:
df.loc['B','Y']

-0.31931804459303326

In [23]:
df.loc[['A','B'],['X','Z']]

Unnamed: 0,X,Z
A,2.70685,0.907969
B,0.651118,-0.848077
