# Pandas
### Pandas is a fast, powerful, flexible, and easy-to-use open-source data analysis and manipulation tool built on top of the Python programming language.
#### It is often used for data wrangling, cleaning, and analysis tasks.
#### It provides data structures like Series and DataFrame, which are designed to handle structured data efficiently.

### Series vs. DataFrame
#### Series is a one-dimensional labeled array capable of holding any data type (integers, strings, floating point numbers, Python objects, etc.).
#### DataFrame is a two-dimensional labeled data structure with columns of potentially different types. It is similar to a spreadsheet or SQL table, or a dictionary of Series objects.
#### DataFrames are generally the most commonly used pandas object.

In [2]:
import pandas as pd

data = [1, 2, 3, 4, 5]
series = pd.Series(data)
print(series)
print(type(series))

0    1
1    2
2    3
3    4
4    5
dtype: int64
<class 'pandas.core.series.Series'>


In [3]:
# Create a series from a dictionary
data_dict = {'a': 1, 'b': 2, 'c': 3}
series_dict = pd.Series(data_dict)
print(series_dict)

a    1
b    2
c    3
dtype: int64


In [6]:
data = [10, 20, 30, 40, 50]
index = ['a', 'b', 'c', 'd', 'e']
pd.Series(data, index=index)

a    10
b    20
c    30
d    40
e    50
dtype: int64

In [7]:
# Data frame
# Create a dataframe from a dictionary of list

data = {
    'Name': ['Steph', 'Max', 'Chloe'],
    'Age': [21, 20, 22],
    'City': ['Seattle', 'New York', 'Denver']
}

df = pd.DataFrame(data)
print(df)
print(type(df))

    Name  Age      City
0  Steph   21   Seattle
1    Max   20  New York
2  Chloe   22    Denver
<class 'pandas.core.frame.DataFrame'>


In [8]:
import numpy as np
np.array(df)

array([['Steph', 21, 'Seattle'],
       ['Max', 20, 'New York'],
       ['Chloe', 22, 'Denver']], dtype=object)

In [9]:
data = [
    {'Name': 'Steph', 'Age': 22, 'City': 'Denver'},
    {'Name': 'Max', 'Age': 18, 'City': 'Seattle'},
    {'Name': 'Chloe', 'Age': 20, 'City': 'New York'}
]

df = pd.DataFrame(data)
print(df)
print(type(df))

    Name  Age      City
0  Steph   22    Denver
1    Max   18   Seattle
2  Chloe   20  New York
<class 'pandas.core.frame.DataFrame'>


In [10]:
df = pd.read_csv("./Details.csv")
print(df.head())

  Order ID  Amount  Profit  Quantity     Category      Sub-Category  \
0  B-25681    1096     658         7  Electronics  Electronic Games   
1  B-26055    5729      64        14    Furniture            Chairs   
2  B-25955    2927     146         8    Furniture         Bookcases   
3  B-26093    2847     712         8  Electronics          Printers   
4  B-25602    2617    1151         4  Electronics            Phones   

   PaymentMode  
0          COD  
1          EMI  
2          EMI  
3  Credit Card  
4  Credit Card  


In [11]:
df.tail(5)

Unnamed: 0,Order ID,Amount,Profit,Quantity,Category,Sub-Category,PaymentMode
1495,B-25700,7,-3,2,Clothing,Hankerchief,COD
1496,B-25757,3151,-35,7,Clothing,Trousers,EMI
1497,B-25973,4141,1698,13,Electronics,Printers,COD
1498,B-25698,7,-2,1,Clothing,Hankerchief,COD
1499,B-25993,4363,305,5,Furniture,Tables,EMI


In [13]:
# Accessing the data from the dataframe
df

Unnamed: 0,Order ID,Amount,Profit,Quantity,Category,Sub-Category,PaymentMode
0,B-25681,1096,658,7,Electronics,Electronic Games,COD
1,B-26055,5729,64,14,Furniture,Chairs,EMI
2,B-25955,2927,146,8,Furniture,Bookcases,EMI
3,B-26093,2847,712,8,Electronics,Printers,Credit Card
4,B-25602,2617,1151,4,Electronics,Phones,Credit Card
...,...,...,...,...,...,...,...
1495,B-25700,7,-3,2,Clothing,Hankerchief,COD
1496,B-25757,3151,-35,7,Clothing,Trousers,EMI
1497,B-25973,4141,1698,13,Electronics,Printers,COD
1498,B-25698,7,-2,1,Clothing,Hankerchief,COD


In [14]:
df['Category']

0       Electronics
1         Furniture
2         Furniture
3       Electronics
4       Electronics
           ...     
1495       Clothing
1496       Clothing
1497    Electronics
1498       Clothing
1499      Furniture
Name: Category, Length: 1500, dtype: object

In [15]:
type(df['Category'])

pandas.core.series.Series

In [18]:
df.loc[0][0]

  df.loc[0][0]


'B-25681'

In [17]:
df.iloc[0]

Order ID                 B-25681
Amount                      1096
Profit                       658
Quantity                       7
Category             Electronics
Sub-Category    Electronic Games
PaymentMode                  COD
Name: 0, dtype: object

In [21]:
df

Unnamed: 0,Order ID,Amount,Profit,Quantity,Category,Sub-Category,PaymentMode
0,B-25681,1096,658,7,Electronics,Electronic Games,COD
1,B-26055,5729,64,14,Furniture,Chairs,EMI
2,B-25955,2927,146,8,Furniture,Bookcases,EMI
3,B-26093,2847,712,8,Electronics,Printers,Credit Card
4,B-25602,2617,1151,4,Electronics,Phones,Credit Card
...,...,...,...,...,...,...,...
1495,B-25700,7,-3,2,Clothing,Hankerchief,COD
1496,B-25757,3151,-35,7,Clothing,Trousers,EMI
1497,B-25973,4141,1698,13,Electronics,Printers,COD
1498,B-25698,7,-2,1,Clothing,Hankerchief,COD


In [20]:
# Accessing a specified element
df.at[1, 'Sub-Category']

'Chairs'

In [22]:
df.at[2, 'Amount']

np.int64(2927)

In [23]:
# Accessing a speciified element usng iat
df.iat[2, 2]

np.int64(146)

In [25]:
# Data manipulation with dataframes
df['Shipped'] = np.random.choice([True, False], size=1500)

In [26]:
df

Unnamed: 0,Order ID,Amount,Profit,Quantity,Category,Sub-Category,PaymentMode,Shipped
0,B-25681,1096,658,7,Electronics,Electronic Games,COD,True
1,B-26055,5729,64,14,Furniture,Chairs,EMI,True
2,B-25955,2927,146,8,Furniture,Bookcases,EMI,True
3,B-26093,2847,712,8,Electronics,Printers,Credit Card,False
4,B-25602,2617,1151,4,Electronics,Phones,Credit Card,True
...,...,...,...,...,...,...,...,...
1495,B-25700,7,-3,2,Clothing,Hankerchief,COD,True
1496,B-25757,3151,-35,7,Clothing,Trousers,EMI,False
1497,B-25973,4141,1698,13,Electronics,Printers,COD,False
1498,B-25698,7,-2,1,Clothing,Hankerchief,COD,True


In [31]:
df.drop('PaymentMode', axis=1, inplace=True) #Inplace makes it permanent

KeyError: "['PaymentMode'] not found in axis"

In [32]:
df['Quantity'] =df['Quantity']+1

In [33]:
df

Unnamed: 0,Order ID,Amount,Profit,Quantity,Category,Sub-Category,Shipped
0,B-25681,1096,658,8,Electronics,Electronic Games,True
1,B-26055,5729,64,15,Furniture,Chairs,True
2,B-25955,2927,146,9,Furniture,Bookcases,True
3,B-26093,2847,712,9,Electronics,Printers,False
4,B-25602,2617,1151,5,Electronics,Phones,True
...,...,...,...,...,...,...,...
1495,B-25700,7,-3,3,Clothing,Hankerchief,True
1496,B-25757,3151,-35,8,Clothing,Trousers,False
1497,B-25973,4141,1698,14,Electronics,Printers,False
1498,B-25698,7,-2,2,Clothing,Hankerchief,True


In [35]:
df.drop(0, inplace=True)

In [36]:
df

Unnamed: 0,Order ID,Amount,Profit,Quantity,Category,Sub-Category,Shipped
1,B-26055,5729,64,15,Furniture,Chairs,True
2,B-25955,2927,146,9,Furniture,Bookcases,True
3,B-26093,2847,712,9,Electronics,Printers,False
4,B-25602,2617,1151,5,Electronics,Phones,True
5,B-25881,2244,247,5,Clothing,Trousers,True
...,...,...,...,...,...,...,...
1495,B-25700,7,-3,3,Clothing,Hankerchief,True
1496,B-25757,3151,-35,8,Clothing,Trousers,False
1497,B-25973,4141,1698,14,Electronics,Printers,False
1498,B-25698,7,-2,2,Clothing,Hankerchief,True


In [37]:
df.describe()

Unnamed: 0,Amount,Profit,Quantity
count,1499.0,1499.0,1499.0
mean,291.310874,24.21948,4.741161
std,461.611117,167.818579,2.18405
min,4.0,-1981.0,2.0
25%,47.5,-12.0,3.0
50%,122.0,8.0,4.0
75%,325.5,38.0,6.0
max,5729.0,1864.0,15.0


In [None]:
df.dtypes

Order ID        object
Amount           int64
Profit           int64
Quantity         int64
Category        object
Sub-Category    object
Shipped           bool
dtype: object

: 