#### Pandas - DataFrame and Series 

Pandas is a powerful data manipulation library in python, widely used for data analysis and data cleaning. It provides two primary data structures.
- A Series is one-Dimensional array- like object
- A DataFrame is a two-Dimensional, Size-Mutable, potentially heterogeneous tabular data structure with labeled axes (rows and columns).

In [1]:
!pip install pandas




In [37]:
import pandas as pd
data = ['apple',2,3,4,'Mango']
series = pd.Series(data)
print("Series:",series)
print(type(series))

Series: 0    apple
1        2
2        3
3        4
4    Mango
dtype: object
<class 'pandas.core.series.Series'>


In [17]:
## Create a series from dictionary 

data = {'apple':100,'Mango':200,'Guva':80,'Watermelon':25}
series = pd.Series(data)
print("Series:",series)
print(type(series))

Series: apple         100
Mango         200
Guva           80
Watermelon     25
dtype: int64
<class 'pandas.core.series.Series'>


In [18]:
data = [10,20,30,40] 
index = ['a','b','c','d']
series = pd.Series(data, index = index)
print(series)

a    10
b    20
c    30
d    40
dtype: int64


In [24]:
## Dataframe
## create a Dataframe from a dictionary of list

data = {

'Name':['Pradeep','Subbu','Nishith'],
'Age':[24,25,24],
'City':['Bangalore','Pune','Hyderabad']

}

df=pd.DataFrame(data)
print(df)
df

      Name  Age       City
0  Pradeep   24  Bangalore
1    Subbu   25       Pune
2  Nishith   24  Hyderabad


Unnamed: 0,Name,Age,City
0,Pradeep,24,Bangalore
1,Subbu,25,Pune
2,Nishith,24,Hyderabad


In [29]:
import numpy as np
print(np.array(df))
np.shape(df)

[['Pradeep' 24 'Bangalore']
 ['Subbu' 25 'Pune']
 ['Nishith' 24 'Hyderabad']]


(3, 3)

In [None]:
# Creating DataFrame from list of dictionarys

data = [
    {'Name':'Pradeep','Age':24,'City':'Bangalore'},
    {'Name':'subbu','Age':26,'City':'pune'},
    {'Name':'Nishith','Age':24,'City':'Hyderabad'},
]

## Accessing data from DataFrame
df = pd.DataFrame(data)
df


Unnamed: 0,Name,Age,City
0,Pradeep,24,Bangalore
1,subbu,26,pune
2,Nishith,24,Hyderabad


In [51]:
## Accessing data with respect to column Name 
df['Name']

0    Pradeep
1      subbu
2    Nishith
Name: Name, dtype: object

In [54]:
df.loc[0] # returns the first row of the DataFrame
df.loc[1] # returns the second row of the DataFrame

Name    subbu
Age        26
City     pune
Name: 1, dtype: object

In [None]:
## Accessing a specified element 
df.at[1,'Age'] ## [row][column Name]

26

In [60]:
## Accessing a specified element using iat
df.iat[0,0]

'Pradeep'

In [95]:
## Data Manipulation with DataFrames

df['Salary']=['51,000','52,000','53,000']
df

Unnamed: 0,Name,Age,City,Salary
0,Pradeep,24,Bangalore,51000
1,subbu,26,pune,52000
2,Nishith,24,Hyderabad,53000


In [92]:
## Remove a column from a df 
df=df.drop('Salary',axis=1)
df

KeyError: "['Salary'] not found in axis"

In [None]:
## Remove a column from a df for viewing purposes
df.drop('Salary',axis=1) 


Unnamed: 0,Name,Age,City
0,Pradeep,24,Bangalore
1,subbu,26,pune
2,Nishith,24,Hyderabad


In [88]:
df

Unnamed: 0,Name,Age,City,Salary
0,Pradeep,24,Bangalore,51000
1,subbu,26,pune,52000
2,Nishith,24,Hyderabad,53000


In [96]:
# Remove a column permenently
df.drop('Salary', axis=1,inplace=True)
df

Unnamed: 0,Name,Age,City
0,Pradeep,24,Bangalore
1,subbu,26,pune
2,Nishith,24,Hyderabad


In [103]:
## add age to the column
df['Age'] = df['Age']+1        ##  +=
df

Unnamed: 0,Name,Age,City
0,Pradeep,3,Bangalore
1,subbu,3,pune
2,Nishith,3,Hyderabad


In [106]:
## Accessing data from csv file 
df=pd.read_csv('Sales_Data.csv') 
df.head(5) # Displaying the first 5 rows of the data
df.tail(5) # Displaying the last 5 rows of the data


Unnamed: 0.1,Unnamed: 0,Order ID,Product,Quantity Ordered,Price Each,Order Date,Purchase Address,Month,Sales,City,Hour
185945,13617,222905,AAA Batteries (4-pack),1,2.99,2019-06-07 19:02:00,"795 Pine St, Boston, MA 02215",6,2.99,Boston,19
185946,13618,222906,27in FHD Monitor,1,149.99,2019-06-01 19:29:00,"495 North St, New York City, NY 10001",6,149.99,New York City,19
185947,13619,222907,USB-C Charging Cable,1,11.95,2019-06-22 18:57:00,"319 Ridge St, San Francisco, CA 94016",6,11.95,San Francisco,18
185948,13620,222908,USB-C Charging Cable,1,11.95,2019-06-26 18:35:00,"916 Main St, San Francisco, CA 94016",6,11.95,San Francisco,18
185949,13621,222909,AAA Batteries (4-pack),1,2.99,2019-06-25 14:33:00,"209 11th St, Atlanta, GA 30301",6,2.99,Atlanta,14


In [107]:
df

Unnamed: 0.1,Unnamed: 0,Order ID,Product,Quantity Ordered,Price Each,Order Date,Purchase Address,Month,Sales,City,Hour
0,0,295665,Macbook Pro Laptop,1,1700.00,2019-12-30 00:01:00,"136 Church St, New York City, NY 10001",12,1700.00,New York City,0
1,1,295666,LG Washing Machine,1,600.00,2019-12-29 07:03:00,"562 2nd St, New York City, NY 10001",12,600.00,New York City,7
2,2,295667,USB-C Charging Cable,1,11.95,2019-12-12 18:21:00,"277 Main St, New York City, NY 10001",12,11.95,New York City,18
3,3,295668,27in FHD Monitor,1,149.99,2019-12-22 15:13:00,"410 6th St, San Francisco, CA 94016",12,149.99,San Francisco,15
4,4,295669,USB-C Charging Cable,1,11.95,2019-12-18 12:38:00,"43 Hill St, Atlanta, GA 30301",12,11.95,Atlanta,12
...,...,...,...,...,...,...,...,...,...,...,...
185945,13617,222905,AAA Batteries (4-pack),1,2.99,2019-06-07 19:02:00,"795 Pine St, Boston, MA 02215",6,2.99,Boston,19
185946,13618,222906,27in FHD Monitor,1,149.99,2019-06-01 19:29:00,"495 North St, New York City, NY 10001",6,149.99,New York City,19
185947,13619,222907,USB-C Charging Cable,1,11.95,2019-06-22 18:57:00,"319 Ridge St, San Francisco, CA 94016",6,11.95,San Francisco,18
185948,13620,222908,USB-C Charging Cable,1,11.95,2019-06-26 18:35:00,"916 Main St, San Francisco, CA 94016",6,11.95,San Francisco,18


In [112]:
df.describe()

Unnamed: 0.1,Unnamed: 0,Order ID,Quantity Ordered,Price Each,Month,Sales,Hour
count,185950.0,185950.0,185950.0,185950.0,185950.0,185950.0,185950.0
mean,8340.388475,230417.569379,1.124383,184.399735,7.05914,185.490917,14.413305
std,5450.554093,51512.73711,0.442793,332.73133,3.502996,332.919771,5.423416
min,0.0,141234.0,1.0,2.99,1.0,2.99,0.0
25%,3894.0,185831.25,1.0,11.95,4.0,11.95,11.0
50%,7786.0,230367.5,1.0,14.95,7.0,14.95,15.0
75%,11872.0,275035.75,1.0,150.0,10.0,150.0,19.0
max,25116.0,319670.0,9.0,1700.0,12.0,3400.0,23.0
