### Pandas-DataFrame And Series
**DataFrame** is a 2-dimensional labeled data structure with columns of potentially different types, similar to a spreadsheet or SQL table.

**Series** is a 1-dimensional labeled array capable of holding any data type, essentially a single column of a DataFrame.

In [1]:
import pandas as pd

In [2]:
# Series example
series_data = pd.Series([1, 2, 3, 4, 5], name='Numbers')
print("Series:")
print(series_data)

Series:
0    1
1    2
2    3
3    4
4    5
Name: Numbers, dtype: int64


In [3]:
# Create Series with dictionary
series_dict = pd.Series({'a': 1, 'b': 2, 'c': 3}, name='Letters')
print("Series from dictionary:")
print(series_dict)

Series from dictionary:
a    1
b    2
c    3
Name: Letters, dtype: int64


In [4]:
# Create a Series with custom index
custom_index_series = pd.Series([10, 20, 30], index=['x', 'y', 'z'], name='CustomIndex')
print("Series with custom index:")
print(custom_index_series)

Series with custom index:
x    10
y    20
z    30
Name: CustomIndex, dtype: int64


In [12]:
## DataFrame example
## Create a DataFrame from a dictionary of lists
data = {
    'Name': ['Muzmmil', 'Jiya', 'Najmin'],
    'Age': [25, 30, 35],
    'City': ['Latur', 'Mumbai', 'Pune']
}
df = pd.DataFrame(data)
print("DataFrame from dictionary:")
print(df)

DataFrame from dictionary:
      Name  Age    City
0  Muzmmil   25   Latur
1     Jiya   30  Mumbai
2   Najmin   35    Pune


In [13]:
print(type(df))

<class 'pandas.core.frame.DataFrame'>


In [14]:
## create a DataFrame From a list of dictionaries
data_list = [
    {'Name': 'Muzmmil', 'Age': 25, 'City': 'Latur'},
    {'Name': 'Jiya', 'Age': 30, 'City': 'Mumbai'},
    {'Name': 'Najmin', 'Age': 35, 'City': 'Pune'}
]
df_from_list = pd.DataFrame(data_list)
print("DataFrame from list of dictionaries:")
print(df_from_list)

DataFrame from list of dictionaries:
      Name  Age    City
0  Muzmmil   25   Latur
1     Jiya   30  Mumbai
2   Najmin   35    Pune


In [16]:
sales_df = pd.read_csv('sales.csv')
print("Sales DataFrame:")
sales_df.head()

Sales DataFrame:


Unnamed: 0,Region,Country,Item Type,Sales Channel,Order Priority,Order Date,Order ID,Ship Date,Units Sold,Unit Price,Unit Cost,Total Revenue,Total Cost,Total Profit
0,Middle East and North Africa,Libya,Cosmetics,Offline,M,10/18/2014,686800706,10/31/2014,8446,437.2,263.33,3692591.2,2224085.18,1468506.02
1,North America,Canada,Vegetables,Online,M,11/7/2011,185941302,12/8/2011,3018,154.06,90.93,464953.08,274426.74,190526.34
2,Middle East and North Africa,Libya,Baby Food,Offline,C,10/31/2016,246222341,12/9/2016,1517,255.28,159.42,387259.76,241840.14,145419.62
3,Asia,Japan,Cereal,Offline,C,4/10/2010,161442649,5/12/2010,3322,205.7,117.11,683335.4,389039.42,294295.98
4,Sub-Saharan Africa,Chad,Fruits,Offline,H,8/16/2011,645713555,8/31/2011,9845,9.33,6.92,91853.85,68127.4,23726.45


In [17]:
sales_df.tail()  # Display the last few rows of the DataFrame

Unnamed: 0,Region,Country,Item Type,Sales Channel,Order Priority,Order Date,Order ID,Ship Date,Units Sold,Unit Price,Unit Cost,Total Revenue,Total Cost,Total Profit
995,Middle East and North Africa,Azerbaijan,Snacks,Offline,C,4/18/2010,534085166,4/25/2010,6524,152.58,97.44,995431.92,635698.56,359733.36
996,Europe,Georgia,Baby Food,Offline,H,8/1/2011,590768182,9/7/2011,288,255.28,159.42,73520.64,45912.96,27607.68
997,Middle East and North Africa,United Arab Emirates,Vegetables,Online,C,5/12/2011,524363124,6/28/2011,9556,154.06,90.93,1472197.36,868927.08,603270.28
998,Europe,Finland,Household,Offline,L,1/25/2016,289606320,2/14/2016,9801,668.27,502.54,6549714.27,4925394.54,1624319.73
999,Europe,Portugal,Cereal,Offline,C,4/10/2014,811546599,5/8/2014,3528,205.7,117.11,725709.6,413164.08,312545.52


In [18]:
df

Unnamed: 0,Name,Age,City
0,Muzmmil,25,Latur
1,Jiya,30,Mumbai
2,Najmin,35,Pune


In [19]:
df['Name']  # Access a specific column

0    Muzmmil
1       Jiya
2     Najmin
Name: Name, dtype: object

In [20]:
type(df['Name'])  # Check the type of the column

pandas.core.series.Series

In [21]:
df.loc[0]  # Access the first row using label-based indexing

Name    Muzmmil
Age          25
City      Latur
Name: 0, dtype: object

In [22]:
df.iloc[0]  # Access the first row using integer-location based indexing

Name    Muzmmil
Age          25
City      Latur
Name: 0, dtype: object

In [23]:
df.iloc[0]['Name']  # Access the 'Name' column of the first row

'Muzmmil'

In [24]:
df.iloc[0][0]

  df.iloc[0][0]


'Muzmmil'

In [25]:
## Accessing specific rows and columns
# Access the first row and the 'Name' column
first_row_name = df.iloc[0]['Name']
print("First row 'Name' column value:", first_row_name)
# Access the first row and the first column
first_row_first_col = df.iloc[0][0]
print("First row first column value:", first_row_first_col)

First row 'Name' column value: Muzmmil
First row first column value: Muzmmil


  first_row_first_col = df.iloc[0][0]


In [26]:
df.at[0, 'Name']  # Access the 'Name' column of the first row using .at

'Muzmmil'

In [27]:
# Accessing specific rows and columns using .iat
first_row_name_iat = df.iat[0, 0]
print("First row 'Name' column value using .iat:", first_row_name_iat)

First row 'Name' column value using .iat: Muzmmil


In [28]:
## Data Manipulation with DataFrames
# Adding a new column
df['Salary'] = [50000, 60000, 70000]
print("DataFrame after adding 'Salary' column:")
print(df)

DataFrame after adding 'Salary' column:
      Name  Age    City  Salary
0  Muzmmil   25   Latur   50000
1     Jiya   30  Mumbai   60000
2   Najmin   35    Pune   70000


In [29]:
# Removing a column
df.drop('Salary', axis=1, inplace=True)
print("DataFrame after removing 'Salary' column:")
print(df)

DataFrame after removing 'Salary' column:
      Name  Age    City
0  Muzmmil   25   Latur
1     Jiya   30  Mumbai
2   Najmin   35    Pune


In [30]:
df['Age'] = df['Age'] + 5  # Increment 'Age' by 5

In [31]:
df

Unnamed: 0,Name,Age,City
0,Muzmmil,30,Latur
1,Jiya,35,Mumbai
2,Najmin,40,Pune


In [32]:
# Display the DataFrame
sales_df.head()


Unnamed: 0,Region,Country,Item Type,Sales Channel,Order Priority,Order Date,Order ID,Ship Date,Units Sold,Unit Price,Unit Cost,Total Revenue,Total Cost,Total Profit
0,Middle East and North Africa,Libya,Cosmetics,Offline,M,10/18/2014,686800706,10/31/2014,8446,437.2,263.33,3692591.2,2224085.18,1468506.02
1,North America,Canada,Vegetables,Online,M,11/7/2011,185941302,12/8/2011,3018,154.06,90.93,464953.08,274426.74,190526.34
2,Middle East and North Africa,Libya,Baby Food,Offline,C,10/31/2016,246222341,12/9/2016,1517,255.28,159.42,387259.76,241840.14,145419.62
3,Asia,Japan,Cereal,Offline,C,4/10/2010,161442649,5/12/2010,3322,205.7,117.11,683335.4,389039.42,294295.98
4,Sub-Saharan Africa,Chad,Fruits,Offline,H,8/16/2011,645713555,8/31/2011,9845,9.33,6.92,91853.85,68127.4,23726.45


In [33]:
sales_df.describe()  # Get summary statistics of the DataFrame

Unnamed: 0,Order ID,Units Sold,Unit Price,Unit Cost,Total Revenue,Total Cost,Total Profit
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,549681300.0,5053.988,262.10684,184.96511,1327322.0,936119.2,391202.6
std,257133400.0,2901.375317,216.02106,175.289311,1486515.0,1162571.0,383640.2
min,102928000.0,13.0,9.33,6.92,2043.25,1416.75,532.61
25%,328074000.0,2420.25,81.73,56.67,281191.9,164931.9,98376.12
50%,556609700.0,5184.0,154.06,97.44,754939.2,464726.1,277226.0
75%,769694500.0,7536.75,421.89,263.33,1733503.0,1141750.0,548456.8
max,995529800.0,9998.0,668.27,524.96,6617210.0,5204978.0,1726181.0
