# DataFrame: two dimensional data structure

Two-dimensional, size-mutable, potentially heterogeneous tabular data.

Data structure also contains labeled axes (rows and columns). Arithmetic operations align on both row and column labels. Can be thought of as a dict-like container for Series objects. The primary pandas data structure.

# creating dataframe using python list

In [1]:
import pandas as pd

In [2]:
data = [1,1,2,3,5,8,13]

In [10]:
df = pd.DataFrame(data)

In [11]:
df

Unnamed: 0,0
0,1
1,1
2,2
3,3
4,5
5,8
6,13


In [12]:
type(df)

pandas.core.frame.DataFrame

In [13]:
df = pd.DataFrame(data, columns = ['fib'])

In [14]:
df

Unnamed: 0,fib
0,1
1,1
2,2
3,3
4,5
5,8
6,13


In [15]:
df['fib']

0     1
1     1
2     2
3     3
4     5
5     8
6    13
Name: fib, dtype: int64

In [16]:
type(df['fib'])

pandas.core.series.Series

# creating dataframe using list of list

In [17]:
data = [['ram', 10], ['shyam', 11], ['rita', 22]]

In [18]:
df = pd.DataFrame(data)

In [19]:
df

Unnamed: 0,0,1
0,ram,10
1,shyam,11
2,rita,22


In [20]:
df = pd.DataFrame(data, columns = ['Name', 'no.of chocolate'])

In [21]:
df

Unnamed: 0,Name,no.of chocolate
0,ram,10
1,shyam,11
2,rita,22


In [23]:
df['Name']

0      ram
1    shyam
2     rita
Name: Name, dtype: object

In [24]:
type(df['Name'])

pandas.core.series.Series

In [25]:
type(df['no.of chocolate'])

pandas.core.series.Series

In [26]:
df['no.of chocolate']

0    10
1    11
2    22
Name: no.of chocolate, dtype: int64

In [27]:
df['no.of chocolate'].values

array([10, 11, 22])

In [28]:
df

Unnamed: 0,Name,no.of chocolate
0,ram,10
1,shyam,11
2,rita,22


In [31]:
df['Name'].values.tolist()

['ram', 'shyam', 'rita']

In [32]:
df.values

array([['ram', 10],
       ['shyam', 11],
       ['rita', 22]], dtype=object)

In [33]:
df.values[:, 0]

array(['ram', 'shyam', 'rita'], dtype=object)

# accessing column values of pandas DataFrame

In [34]:
df = pd.DataFrame(data, columns = ['Name', 'no.of chocolate'])

In [35]:
df

Unnamed: 0,Name,no.of chocolate
0,ram,10
1,shyam,11
2,rita,22


In [36]:
df['Name']

0      ram
1    shyam
2     rita
Name: Name, dtype: object

In [37]:
df.Name

0      ram
1    shyam
2     rita
Name: Name, dtype: object

In [38]:
df['no.of chocolate']

0    10
1    11
2    22
Name: no.of chocolate, dtype: int64

In [39]:
df.no.of chocolate # won't work

SyntaxError: ignored

# Creating DataFrame using python dictionary

In [46]:
student = {
    "name": ['Anish', "Bhuwan", 'Dinesh', 'Sanjaya', "Arjun", 'Krishna', 'Sita', 'Gita'],
    'gender': ['M', "M", "m", "M", "M", "M", 'F', 'f']
}

In [47]:
df = pd.DataFrame(student)

In [48]:
df

Unnamed: 0,name,gender
0,Anish,M
1,Bhuwan,M
2,Dinesh,m
3,Sanjaya,M
4,Arjun,M
5,Krishna,M
6,Sita,F
7,Gita,f


In [49]:
df['gender']

0    M
1    M
2    m
3    M
4    M
5    M
6    F
7    f
Name: gender, dtype: object

In [52]:
# lowercase all gender values
df['gender'] = df['gender'].str.lower()

In [53]:
df

Unnamed: 0,name,gender
0,Anish,m
1,Bhuwan,m
2,Dinesh,m
3,Sanjaya,m
4,Arjun,m
5,Krishna,m
6,Sita,f
7,Gita,f


In [55]:
len(df)

8

In [57]:
list(range(8))

[0, 1, 2, 3, 4, 5, 6, 7]

In [58]:
df['some_val'] = list(range(8))

In [59]:
df

Unnamed: 0,name,gender,some_val
0,Anish,m,0
1,Bhuwan,m,1
2,Dinesh,m,2
3,Sanjaya,m,3
4,Arjun,m,4
5,Krishna,m,5
6,Sita,f,6
7,Gita,f,7


# Delete a column in dataframe
* pop()
* del


In [61]:
df.head(2)

Unnamed: 0,name,gender,some_val
0,Anish,m,0
1,Bhuwan,m,1


In [62]:
df.pop('some_val')

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
Name: some_val, dtype: int64

In [63]:
df

Unnamed: 0,name,gender
0,Anish,m
1,Bhuwan,m
2,Dinesh,m
3,Sanjaya,m
4,Arjun,m
5,Krishna,m
6,Sita,f
7,Gita,f


In [64]:
del df['gender']

In [65]:
df

Unnamed: 0,name
0,Anish
1,Bhuwan
2,Dinesh
3,Sanjaya
4,Arjun
5,Krishna
6,Sita
7,Gita


# Creating pandas DataFrame using pandas series

In [66]:
d = {
    'one': pd.Series([1,2,3], index = ['a', 'b', 'c']),
     'two': pd.Series([4,5,6,7],  index = ['a', 'b', 'c', 'd'])
}

In [67]:
df = pd.DataFrame(d)

In [68]:
df

Unnamed: 0,one,two
a,1.0,4
b,2.0,5
c,3.0,6
d,,7


In [69]:
df['one']

a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64

In [72]:
import numpy as np

In [73]:
type(np.nan)

float

# checking missing data in pandas dataframe

In [74]:
df

Unnamed: 0,one,two
a,1.0,4
b,2.0,5
c,3.0,6
d,,7


In [75]:
df.isnull()

Unnamed: 0,one,two
a,False,False
b,False,False
c,False,False
d,True,False


In [76]:
df.isnull().sum()

one    1
two    0
dtype: int64

# creating csv using pandas dataframe

In [80]:
data = {'Rank':[1, 2, 3, 4, 5],
       'Language': ['Python', 'Java',
                   'Javascript',
                   'C#', 'PHP'],
       'Share':[29.88, 19.05, 8.17,
               7.3, 6.15],
       'Trend':[np.nan, -1.8, 0.1, -0.1, -1.0]}

In [81]:
df = pd.DataFrame(data)

In [82]:
df

Unnamed: 0,Rank,Language,Share,Trend
0,1,Python,29.88,
1,2,Java,19.05,-1.8
2,3,Javascript,8.17,0.1
3,4,C#,7.3,-0.1
4,5,PHP,6.15,-1.0


In [84]:
df.to_csv('somedata.csv', index = False)

# reading csv files using pandas

In [3]:
filepath = '/content/somedata.csv'

In [4]:
import pandas as pd
lang = pd.read_csv(filepath)

In [5]:
lang

Unnamed: 0,Rank,Language,Share,Trend
0,1,Python,29.88,
1,2,Java,19.05,-1.8
2,3,Javascript,8.17,0.1
3,4,C#,7.3,-0.1
4,5,PHP,6.15,-1.0


In [13]:
type(lang)

pandas.core.frame.DataFrame

In [6]:
lang.isnull().sum()

Rank        0
Language    0
Share       0
Trend       1
dtype: int64

In [7]:
lang.head(2)

Unnamed: 0,Rank,Language,Share,Trend
0,1,Python,29.88,
1,2,Java,19.05,-1.8


In [8]:
lang.tail(2)

Unnamed: 0,Rank,Language,Share,Trend
3,4,C#,7.3,-0.1
4,5,PHP,6.15,-1.0


In [9]:
lang.columns # displays column names of lang

Index(['Rank', 'Language', 'Share', 'Trend'], dtype='object')

In [10]:
lang.columns[1:]

Index(['Language', 'Share', 'Trend'], dtype='object')

In [11]:
usecols = ['Language', 'Share', 'Trend']

lang = pd.read_csv(filepath, usecols = usecols)

In [12]:
lang

Unnamed: 0,Language,Share,Trend
0,Python,29.88,
1,Java,19.05,-1.8
2,Javascript,8.17,0.1
3,C#,7.3,-0.1
4,PHP,6.15,-1.0


# creating table in markdown


| Name | roll_no |
| :---: | :---: |
|kshitiz| 1|
|Ram | 2 |


|Rank|Language|Share|Trend|
|---| :---: |---|---|
|1|Python|29\.88||
|2|Java|19\.05|-1\.8|
|3|Javascript|8\.17|0\.1|
|4|C\#|7\.3|-0\.1|
|5|PHP|6\.15|-1\.0|
