# Pandas DataFrame overview
#A DataFrame represents a rectangular table of data and contains an ordered collection of columns, each of which can be a different value type (numeric, string, boolean, etc.). The DataFrame has both a row and column index



In [1]:
import pandas as pd

apples = pd.Series([3,2,0,1])
oranges = pd.Series([3,4,7,8])

#print(apples, oranges)
data = {"apples": apples, "oranges": oranges}
fruits_df = pd.DataFrame(data)
print(fruits_df)

   apples  oranges
0       3        3
1       2        4
2       0        7
3       1        8


# Keep in mind, Index

In [2]:
import pandas as pd

apples = pd.Series([3,2,0,1], ["a", "b", "c", "d"] )
oranges = pd.Series([3,2,0,1], index = ["mon", "tue", "wed", "thr"])

#print(apples, oranges)
data = {"apples": apples, "oranges": oranges}
fruits_df = pd.DataFrame(data)
print(fruits_df)
# index not matched

     apples  oranges
a       3.0      NaN
b       2.0      NaN
c       0.0      NaN
d       1.0      NaN
mon     NaN      3.0
thr     NaN      1.0
tue     NaN      2.0
wed     NaN      0.0


In [3]:
import pandas as pd

apples = pd.Series([3,2,0,1] , index = ["mon", "tue", "wed", "thr"] )
oranges = pd.Series([3,2,0,1], index = ["mon", "tue", "wed", "thr"])

#print(apples,"\n", oranges)
data = {"apples": apples, "oranges": oranges}
fruits_df = pd.DataFrame(data)
print(fruits_df)

     apples  oranges
mon       3        3
tue       2        2
wed       0        0
thr       1        1


In [4]:
state = ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada']
data = {'state': state ,
        'year' : [2000, 2001, 2002, 2001, 2002, 2003],
        'pop'  : [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
state_pop_df = pd.DataFrame(data 
                     , index = ['1st', '2nd', '3rd', 
                                '4th', '5th','6th'] )
print(state_pop_df)

      state  year  pop
1st    Ohio  2000  1.5
2nd    Ohio  2001  1.7
3rd    Ohio  2002  3.6
4th  Nevada  2001  2.4
5th  Nevada  2002  2.9
6th  Nevada  2003  3.2


In [5]:
#state_pop_df.head()
state_pop_df =pd.DataFrame(data, columns=['year', 'state', 'pop'])
state_pop_df.head()

Unnamed: 0,year,state,pop
0,2000,Ohio,1.5
1,2001,Ohio,1.7
2,2002,Ohio,3.6
3,2001,Nevada,2.4
4,2002,Nevada,2.9


In [6]:
frame2 = pd.DataFrame(data, columns=['year', 'state', 'pop', 'debt'],
index=['one', 'two', 'three', 'four','five', 'six'])
frame2.head()

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,
three,2002,Ohio,3.6,
four,2001,Nevada,2.4,
five,2002,Nevada,2.9,


In [7]:
print(frame2.columns)
print( frame2.index )

Index(['year', 'state', 'pop', 'debt'], dtype='object')
Index(['one', 'two', 'three', 'four', 'five', 'six'], dtype='object')


# A column in a DataFrame can be retrieved as a Series either by dict-like notation or by attribute

In [8]:
#print(data)
#data['state']
# this is dictinary like notation to access or extract
# dataframe column data
frame2["state"]

one        Ohio
two        Ohio
three      Ohio
four     Nevada
five     Nevada
six      Nevada
Name: state, dtype: object

In [9]:

# there frame2 is another method , attribute style of accessing
# dataframe data.
frame2.state

one        Ohio
two        Ohio
three      Ohio
four     Nevada
five     Nevada
six      Nevada
Name: state, dtype: object

In [10]:

print(frame2.loc['two'] )
state_pop_df.head()
frame2.head()

year     2001
state    Ohio
pop       1.7
debt      NaN
Name: two, dtype: object


Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,
three,2002,Ohio,3.6,
four,2001,Nevada,2.4,
five,2002,Nevada,2.9,


# Columns can be modified by assignment. For example, the empty 'debt' column could be assigned a scalar value or an array of values:

In [11]:
frame2['debt'] = 16.5
frame2.head()

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,16.5
two,2001,Ohio,1.7,16.5
three,2002,Ohio,3.6,16.5
four,2001,Nevada,2.4,16.5
five,2002,Nevada,2.9,16.5


In [12]:
# important cell for codes review
import numpy as np
import pandas as pd
state = ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada']

data = {'state': state ,
        'year' : [2000, 2001, 2002, 2001, 2002, 2003],
        'pop'  : [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}

frame2 = pd.DataFrame(data, columns=['year', 'state', 'pop', 'debt'],
        index=['one', 'two', 'three', 'four','five', 'six'])

ln = len(frame2) # finding now of rows in dataframe
#print(frame2)
rng = np.arange(ln)
print(rng)
frame2['debt'] = rng
frame2

[0 1 2 3 4 5]


Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,0
two,2001,Ohio,1.7,1
three,2002,Ohio,3.6,2
four,2001,Nevada,2.4,3
five,2002,Nevada,2.9,4
six,2003,Nevada,3.2,5
