In [None]:
# Why dataframe is important:
# 1 --> Structure: It's perfect for handling structured data.You can  easily store 
#          information like names, ages, and cities together in a clean, organized format.
# 2--> Analysis:  It provides powerful tools for cleaning, transforming, and analyzing data.
#          You can filter data, handle missing values, and combine different datasets with just
#          a few lines of code.
# 3--> Flexibility: Each column in a DataFrame is a Pandas Series, but the DataFrame
#          allows you to manage all of them together, making complex data manipulation 
#          tasks simple and efficient.

In [103]:
import numpy as np
import pandas as pd

In [34]:
# Creating A DataFrame

In [104]:
# From a Dictionary (using lists)
data = {
    'Name': ['Cena', 'Meena', 'Teena', 'Reena'],
    'Age': [18, 19, 25, 30],
    'City': ['Pune', 'Mumbai', 'Nashik', 'Satana'],
    'Salary': [50000, 55000, 60000, 65000]
}
df = pd.DataFrame(data)

In [105]:
df

Unnamed: 0,Name,Age,City,Salary
0,Cena,18,Pune,50000
1,Meena,19,Mumbai,55000
2,Teena,25,Nashik,60000
3,Reena,30,Satana,65000


In [106]:
# from a List of Dictionaries
data_list = [
      ['Cena', 18, 'Pune', 50000], 
      ['Meena', 19, 'Mumbai', 55000],  
      ['Teena', 25, 'Nashik', 60000],
      ['Reena', 30,'Satana', 65000]
]
df2 = pd.DataFrame(data_list)

In [107]:
df2

Unnamed: 0,0,1,2,3
0,Cena,18,Pune,50000
1,Meena,19,Mumbai,55000
2,Teena,25,Nashik,60000
3,Reena,30,Satana,65000


In [None]:
# We can create a dataframe using a list also . but we have to set columns manually.

In [108]:
data_list = [
      ['Cena', 18, 'Pune', 50000], 
      ['Meena', 19, 'Mumbai', 55000],  
      ['Teena', 25, 'Nashik', 60000],
      ['Reena', 30,'Satana', 65000]
]
df2 = pd.DataFrame(data_list)
columns = ["Name", "Age", "City", "Salary"]
df2 = pd.DataFrame(data_list,columns = columns)
df2

Unnamed: 0,Name,Age,City,Salary
0,Cena,18,Pune,50000
1,Meena,19,Mumbai,55000
2,Teena,25,Nashik,60000
3,Reena,30,Satana,65000


In [15]:
# Selection ANd Indexing of Columns

In [109]:
df2

Unnamed: 0,Name,Age,City,Salary
0,Cena,18,Pune,50000
1,Meena,19,Mumbai,55000
2,Teena,25,Nashik,60000
3,Reena,30,Satana,65000


In [110]:
# For targeting of columns 
df2['Name']     

0     Cena
1    Meena
2    Teena
3    Reena
Name: Name, dtype: object

In [111]:
# For targeting 2 columns, we have to provide list of list.
df2[['Name', 'City']]

Unnamed: 0,Name,City
0,Cena,Pune
1,Meena,Mumbai
2,Teena,Nashik
3,Reena,Satana


In [112]:
# Creating a New Column.
df2['Designation'] = ['Doctor', 'Teacher', 'Eng.', 'Teacher']

In [61]:
df2


Unnamed: 0,Name,Age,City,Salary,Designation
0,Cena,18,Pune,50000,Doctor
1,Meena,19,Mumbai,55000,Teacher
2,Teena,25,Nashik,60000,Eng.
3,Reena,30,Satana,65000,Teacher


In [68]:
# Removing Columns.

In [69]:
# Axis 0 runs vertically, along the rows.
# axis=0 (or axis='index'): This is the default. It tells the function to operate down the rows. 
# For example, when you drop rows, you're operating on axis=0.


# Axis 1 runs horizontally, along the columns.
# axis=1 (or axis='columns'): This tells the function to operate across the columns. 
# For example, when you drop a column, you're operating on axis=1.

In [113]:
df2.drop('Designation',axis = 1)

Unnamed: 0,Name,Age,City,Salary
0,Cena,18,Pune,50000
1,Meena,19,Mumbai,55000
2,Teena,25,Nashik,60000
3,Reena,30,Satana,65000


In [114]:
df2

Unnamed: 0,Name,Age,City,Salary,Designation
0,Cena,18,Pune,50000,Doctor
1,Meena,19,Mumbai,55000,Teacher
2,Teena,25,Nashik,60000,Eng.
3,Reena,30,Satana,65000,Teacher


In [49]:
# 'inplace' 
# 1--> inplace=False (Default): The method does not modify the original DataFrame.
#      Instead, it returns a new DataFrame with the changes. The original DataFrame remains unchanged.
#      This is generally the safer practice because it prevents accidental data loss.
#
# 2--> inplace=True: The method modifies the original DataFrame directly.
#      It does not return a new DataFrame. If you use this, you don't need to re-assign
#      the result to the DataFrame variable (e.g., you don't need df = df.drop(...)).

In [115]:
df2.drop('Designation', axis=1, inplace= True)
df2

Unnamed: 0,Name,Age,City,Salary
0,Cena,18,Pune,50000
1,Meena,19,Mumbai,55000
2,Teena,25,Nashik,60000
3,Reena,30,Satana,65000


In [116]:
# remove columns 
df2.drop(['City', 'Name'],axis = 1)

Unnamed: 0,Age,Salary
0,18,50000
1,19,55000
2,25,60000
3,30,65000


In [117]:
# remove rows
df2.drop(2,axis = 0)

Unnamed: 0,Name,Age,City,Salary
0,Cena,18,Pune,50000
1,Meena,19,Mumbai,55000
3,Reena,30,Satana,65000


In [79]:
# Selecting Rows

In [118]:
df2

Unnamed: 0,Name,Age,City,Salary
0,Cena,18,Pune,50000
1,Meena,19,Mumbai,55000
2,Teena,25,Nashik,60000
3,Reena,30,Satana,65000


In [119]:
# for selecting rows we can use 'loc'
df2.loc[0]

Name       Cena
Age          18
City       Pune
Salary    50000
Name: 0, dtype: object

In [120]:
df2.loc[[0,3]]

Unnamed: 0,Name,Age,City,Salary
0,Cena,18,Pune,50000
3,Reena,30,Satana,65000


In [89]:
# Selecting Subset of rows and columns

# In selecting always first select rows and then select columns.

In [123]:
df.loc[[0,1]][['City', 'Salary']]

Unnamed: 0,City,Salary
0,Pune,50000
1,Mumbai,55000


In [91]:
# Conditional Selection

In [124]:
df2

Unnamed: 0,Name,Age,City,Salary
0,Cena,18,Pune,50000
1,Meena,19,Mumbai,55000
2,Teena,25,Nashik,60000
3,Reena,30,Satana,65000


In [129]:
# i only want to see those people whose age is above 24
df2[df2['Age'] > 24]

Unnamed: 0,Name,Age,City,Salary
2,Teena,25,Nashik,60000
3,Reena,30,Satana,65000


In [130]:
# i only want people whose age is above 24 and their city must be Satana
df2[(df2['Age'] > 24) & (df2['City'] == 'Satana')]

Unnamed: 0,Name,Age,City,Salary
3,Reena,30,Satana,65000
