## Pandas Introduction :

 Pandas is a powerful and flexible Python library used for data manipulation and analysis. It's built on top of NumPy, providing high-performance data structures and data analysis tools

In [2]:
import pandas as pd 

In [34]:
import pandas as pd
import numpy as np
import faker

# Initialize Faker
fake = faker.Faker()

# Number of rows
num_rows = 250

# Generating data
data = {
    'empid': [fake.unique.random_int(min=1000, max=9999) for _ in range(num_rows)],
    'empname': [fake.name() for _ in range(num_rows)],
    'joining_date': [fake.date_this_decade() for _ in range(num_rows)],
    'empcity': [fake.city() for _ in range(num_rows)]
}

# Create DataFrame
df = pd.DataFrame(data)

# Introduce 30% missing values
nan_indices = np.random.choice(df.index, size=int(num_rows * 0.3), replace=False)
for col in df.columns:
    df.loc[nan_indices, col] = np.nan

# Display the first 10 rows of the DataFrame
print(df.head(10))


    empid             empname joining_date      empcity
0     NaN                 NaN          NaN          NaN
1  2957.0      Brianna Jordan   2023-10-09   Pamelafort
2  2116.0  Rebecca Montgomery   2020-03-31  Johnnyville
3  2577.0     Judith Mitchell   2021-02-09    Ochoaview
4  6632.0           Mary Love   2020-11-13  New Jessica
5  9613.0         Daniel Rowe   2023-08-15  Hallchester
6     NaN                 NaN          NaN          NaN
7  2035.0         John Miller   2023-03-18   West David
8  3513.0           Kyle Wise   2023-11-09  North Emily
9  9615.0          Brian Kemp   2020-09-28     Lisaview


In [4]:
print(df.to_string())

      empid                       empname joining_date                 empcity
0       NaN                           NaN          NaN                     NaN
1       NaN                           NaN          NaN                     NaN
2    1362.0                      Brad Day   2024-02-13              Walkertown
3    7314.0               Theresa Jackson   2022-05-21          West Caseystad
4       NaN                           NaN          NaN                     NaN
5    3872.0                     Lisa Lowe   2022-03-24       New Christinaport
6    2154.0                  Jeffrey Gill   2023-04-25              East Sarah
7    8600.0                 Rebecca Brown   2020-09-23      South Davidchester
8    9459.0                 Hector Juarez   2022-05-20              Julianbury
9    2881.0                  Julia Wilson   2023-11-14              Ramosshire
10   9037.0               Joshua Guerrero   2023-05-23               Moyerberg
11      NaN                           NaN          N

In [5]:
# Creating a DataFrame
data = {
    'Name': ['John', 'Anna', 'Peter', 'Linda'],
    'Age': [28, 24, 35, 32],
    'City': ['New York', 'Paris', 'Berlin', 'London']
}
df1 = pd.DataFrame(data)
print(df1)


    Name  Age      City
0   John   28  New York
1   Anna   24     Paris
2  Peter   35    Berlin
3  Linda   32    London


# Basic Operation in pandas

### Viewing Data:
head(n): Returns the first n rows.

tail(n): Returns the last n rows.

info(): Provides a summary of the DataFrame.

describe(): Generates descriptive statistics.

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 250 entries, 0 to 249
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   empid         175 non-null    float64
 1   empname       175 non-null    object 
 2   joining_date  175 non-null    object 
 3   empcity       175 non-null    object 
dtypes: float64(1), object(3)
memory usage: 7.9+ KB


In [7]:
df.head()

Unnamed: 0,empid,empname,joining_date,empcity
0,,,,
1,,,,
2,1362.0,Brad Day,2024-02-13,Walkertown
3,7314.0,Theresa Jackson,2022-05-21,West Caseystad
4,,,,


In [8]:
df.head(6)# parameterize head

Unnamed: 0,empid,empname,joining_date,empcity
0,,,,
1,,,,
2,1362.0,Brad Day,2024-02-13,Walkertown
3,7314.0,Theresa Jackson,2022-05-21,West Caseystad
4,,,,
5,3872.0,Lisa Lowe,2022-03-24,New Christinaport


In [9]:
df.tail()

Unnamed: 0,empid,empname,joining_date,empcity
245,2103.0,Danny Larson,2021-12-27,Fryland
246,2605.0,Stacy Gonzalez,2023-02-22,East Victoriatown
247,9967.0,Nathan Hull,2023-03-03,Torresview
248,2445.0,Benjamin Johnson,2022-09-03,Feliciafort
249,2376.0,Jeffery Barry,2024-02-18,South Michaelstad


In [10]:
df.tail(6)#parameterize tail

Unnamed: 0,empid,empname,joining_date,empcity
244,,,,
245,2103.0,Danny Larson,2021-12-27,Fryland
246,2605.0,Stacy Gonzalez,2023-02-22,East Victoriatown
247,9967.0,Nathan Hull,2023-03-03,Torresview
248,2445.0,Benjamin Johnson,2022-09-03,Feliciafort
249,2376.0,Jeffery Barry,2024-02-18,South Michaelstad


### Indexing and Selecting Data
Selecting a Column

You can select a column as a Series:

In [12]:
print(df[["empid","empcity"]])

      empid            empcity
0       NaN                NaN
1       NaN                NaN
2    1362.0         Walkertown
3    7314.0     West Caseystad
4       NaN                NaN
..      ...                ...
245  2103.0            Fryland
246  2605.0  East Victoriatown
247  9967.0         Torresview
248  2445.0        Feliciafort
249  2376.0  South Michaelstad

[250 rows x 2 columns]


In [15]:
print(df[["empid","empcity"]])

      empid            empcity
0       NaN                NaN
1       NaN                NaN
2    1362.0         Walkertown
3    7314.0     West Caseystad
4       NaN                NaN
..      ...                ...
245  2103.0            Fryland
246  2605.0  East Victoriatown
247  9967.0         Torresview
248  2445.0        Feliciafort
249  2376.0  South Michaelstad

[250 rows x 2 columns]


### Selecting Rows:
iloc[]: Purely integer-location based indexing for selection by position.
    
loc[]: Access a group of rows and columns by labels or a boolean array.

In [16]:
df["empid"].iloc[1]

nan

In [17]:
df["empid"].iloc[2]

1362.0

In [18]:
df["empcity"].iloc[2]

'Walkertown'

In [19]:
df["empcity"].iloc[3]

'West Caseystad'

In [21]:
df['empid'].loc[2]

1362.0

In [22]:
df["empcity"].loc[3]

'West Caseystad'

### Selecting Multiple Columns
You can select multiple columns by passing a list of column names:

In [None]:
 print(df[["empid","empcity"]].to_string())

# Data Manipulation
Adding a New Column

You can add a new column by assigning a Series to a new column name:

In [28]:
df1["empsalary"]=[1,2,3,4]

In [29]:
df1

Unnamed: 0,Name,Age,City,empsalary
0,John,28,New York,1
1,Anna,24,Paris,2
2,Peter,35,Berlin,3
3,Linda,32,London,4


In [32]:
df1=df1.drop("empsalary",axis=1)

In [33]:
df1

Unnamed: 0,Name,Age,City
0,John,28,New York
1,Anna,24,Paris
2,Peter,35,Berlin
3,Linda,32,London


In [35]:
adults=df1[df1["Age"]>25]#Filteration of data
print(adults)

    Name  Age      City
0   John   28  New York
2  Peter   35    Berlin
3  Linda   32    London


In [38]:
adults=df1[df1["Name"]=="John"]
print(adults)

   Name  Age      City
0  John   28  New York


### Handling Missing Data
Pandas provides several methods to handle missing data:

isnull(): Detect missing values.
    
dropna(): Remove missing values.
    
fillna(): Fill missing values.

In [41]:
df.isnull()

Unnamed: 0,empid,empname,joining_date,empcity
0,True,True,True,True
1,True,True,True,True
2,False,False,False,False
3,False,False,False,False
4,True,True,True,True
...,...,...,...,...
245,False,False,False,False
246,False,False,False,False
247,False,False,False,False
248,False,False,False,False


In [43]:
df.isna().sum()

empid           75
empname         75
joining_date    75
empcity         75
dtype: int64

In [45]:
df1.loc[1, 'Age'] = None  # Introducing a missing value
df1 = df1.dropna()  # Drop rows with missing values
print(df1)

# Fill missing values
df1['Age'].fillna(df1['Age'].mean(), inplace=True)
print(df1)


    Name   Age      City
0   John  28.0  New York
2  Peter  35.0    Berlin
3  Linda  32.0    London
    Name   Age      City
0   John  28.0  New York
2  Peter  35.0    Berlin
3  Linda  32.0    London


In [2]:
import pandas as pd
import numpy as np
import faker

# Initialize Faker
fake = faker.Faker()

# Number of rows
num_rows = 250

# Generating data
data = {
    'empid': [fake.unique.random_int(min=1000, max=9999) for _ in range(num_rows)],
    'empname': [fake.name() for _ in range(num_rows)],
    'joining_date': [fake.date_this_decade() for _ in range(num_rows)],
    'empcity': [fake.city() for _ in range(num_rows)]
}

# Create DataFrame
df = pd.DataFrame(data)

# Introduce 30% missing values
nan_indices = np.random.choice(df.index, size=int(num_rows * 0.3), replace=False)
for col in df.columns:
    df.loc[nan_indices, col] = np.nan

# Display the first 10 rows of the DataFrame
print(df.head(10))


    empid           empname joining_date          empcity
0  3717.0      Jenna Jensen   2020-05-15      Weaverhaven
1     NaN               NaN          NaN              NaN
2     NaN               NaN          NaN              NaN
3     NaN               NaN          NaN              NaN
4  8667.0     John Phillips   2021-02-03      New Brandon
5  8688.0   Nathan Mccarthy   2021-05-21  New Patrickstad
6  8349.0  Brandon Richards   2021-12-27      Lake Alicia
7  5104.0         Ryan Shaw   2020-08-10       Smithville
8  5171.0    Jennifer Brown   2022-10-03   Carpentermouth
9  5779.0  Courtney Elliott   2020-09-05   Michaelborough


In [3]:
df

Unnamed: 0,empid,empname,joining_date,empcity
0,3717.0,Jenna Jensen,2020-05-15,Weaverhaven
1,,,,
2,,,,
3,,,,
4,8667.0,John Phillips,2021-02-03,New Brandon
...,...,...,...,...
245,2280.0,Marc Rasmussen,2020-02-25,Fisherbury
246,1577.0,Scott Herrera,2021-08-03,North Caitlinside
247,5651.0,Stephen Robinson,2021-03-04,Tinahaven
248,6412.0,Denise Bradford,2020-04-29,Barrettton


### Viewing of Data 

In [4]:
df.head()

Unnamed: 0,empid,empname,joining_date,empcity
0,3717.0,Jenna Jensen,2020-05-15,Weaverhaven
1,,,,
2,,,,
3,,,,
4,8667.0,John Phillips,2021-02-03,New Brandon


In [5]:
df.head(3)#parameterized 

Unnamed: 0,empid,empname,joining_date,empcity
0,3717.0,Jenna Jensen,2020-05-15,Weaverhaven
1,,,,
2,,,,


In [6]:
# Generating descriptive statistics
print(df.describe(include='all'))


              empid       empname joining_date      empcity
count    175.000000           175          175          175
unique          NaN           175          167          175
top             NaN  Jenna Jensen   2023-08-12  Weaverhaven
freq            NaN             1            2            1
mean    5580.508571           NaN          NaN          NaN
std     2545.019172           NaN          NaN          NaN
min     1018.000000           NaN          NaN          NaN
25%     3464.000000           NaN          NaN          NaN
50%     5592.000000           NaN          NaN          NaN
75%     7730.000000           NaN          NaN          NaN
max     9985.000000           NaN          NaN          NaN


In [7]:
print(df["empid"].head())

0    3717.0
1       NaN
2       NaN
3       NaN
4    8667.0
Name: empid, dtype: float64


In [8]:
print(df["empname"].tail())

245      Marc Rasmussen
246       Scott Herrera
247    Stephen Robinson
248     Denise Bradford
249     Margaret Harris
Name: empname, dtype: object


In [9]:
print(df[["empid","empcity"]].head())

    empid      empcity
0  3717.0  Weaverhaven
1     NaN          NaN
2     NaN          NaN
3     NaN          NaN
4  8667.0  New Brandon


In [10]:
print(df[["joining_date","empname"]].tail())

    joining_date           empname
245   2020-02-25    Marc Rasmussen
246   2021-08-03     Scott Herrera
247   2021-03-04  Stephen Robinson
248   2020-04-29   Denise Bradford
249   2022-05-07   Margaret Harris


In [12]:
df.loc[0:5]

Unnamed: 0,empid,empname,joining_date,empcity
0,3717.0,Jenna Jensen,2020-05-15,Weaverhaven
1,,,,
2,,,,
3,,,,
4,8667.0,John Phillips,2021-02-03,New Brandon
5,8688.0,Nathan Mccarthy,2021-05-21,New Patrickstad


In [13]:
df.loc[12:15]# gives one less value so always give extra value

Unnamed: 0,empid,empname,joining_date,empcity
12,,,,
13,6567.0,Frank Hall,2024-02-11,Scotttown
14,4884.0,Brian Boone,2024-05-03,West Selenaburgh
15,,,,


In [14]:
df.iloc[12:18]

Unnamed: 0,empid,empname,joining_date,empcity
12,,,,
13,6567.0,Frank Hall,2024-02-11,Scotttown
14,4884.0,Brian Boone,2024-05-03,West Selenaburgh
15,,,,
16,6514.0,Gary Lindsey,2024-02-20,Clarkhaven
17,9713.0,Thomas Schneider,2022-04-08,New Sandrafort


In [15]:
df['salary'] = np.random.randint(30000, 120000, size=num_rows)

In [16]:
df

Unnamed: 0,empid,empname,joining_date,empcity,salary
0,3717.0,Jenna Jensen,2020-05-15,Weaverhaven,47213
1,,,,,111092
2,,,,,101775
3,,,,,52193
4,8667.0,John Phillips,2021-02-03,New Brandon,71639
...,...,...,...,...,...
245,2280.0,Marc Rasmussen,2020-02-25,Fisherbury,73575
246,1577.0,Scott Herrera,2021-08-03,North Caitlinside,79049
247,5651.0,Stephen Robinson,2021-03-04,Tinahaven,100891
248,6412.0,Denise Bradford,2020-04-29,Barrettton,67814


In [17]:
df["Bonus"]=np.random.randint(5000,7500,size=num_rows)

In [18]:
df

Unnamed: 0,empid,empname,joining_date,empcity,salary,Bonus
0,3717.0,Jenna Jensen,2020-05-15,Weaverhaven,47213,7041
1,,,,,111092,5152
2,,,,,101775,5155
3,,,,,52193,6501
4,8667.0,John Phillips,2021-02-03,New Brandon,71639,5381
...,...,...,...,...,...,...
245,2280.0,Marc Rasmussen,2020-02-25,Fisherbury,73575,6201
246,1577.0,Scott Herrera,2021-08-03,North Caitlinside,79049,7419
247,5651.0,Stephen Robinson,2021-03-04,Tinahaven,100891,6808
248,6412.0,Denise Bradford,2020-04-29,Barrettton,67814,6062


In [20]:
df=df.drop("salary",axis=1)

In [21]:
df

Unnamed: 0,empid,empname,joining_date,empcity,Bonus
0,3717.0,Jenna Jensen,2020-05-15,Weaverhaven,7041
1,,,,,5152
2,,,,,5155
3,,,,,6501
4,8667.0,John Phillips,2021-02-03,New Brandon,5381
...,...,...,...,...,...
245,2280.0,Marc Rasmussen,2020-02-25,Fisherbury,6201
246,1577.0,Scott Herrera,2021-08-03,North Caitlinside,7419
247,5651.0,Stephen Robinson,2021-03-04,Tinahaven,6808
248,6412.0,Denise Bradford,2020-04-29,Barrettton,6062


In [22]:
df=df.drop("Bonus",axis=1)

In [23]:
print(df)

      empid           empname joining_date            empcity
0    3717.0      Jenna Jensen   2020-05-15        Weaverhaven
1       NaN               NaN          NaN                NaN
2       NaN               NaN          NaN                NaN
3       NaN               NaN          NaN                NaN
4    8667.0     John Phillips   2021-02-03        New Brandon
..      ...               ...          ...                ...
245  2280.0    Marc Rasmussen   2020-02-25         Fisherbury
246  1577.0     Scott Herrera   2021-08-03  North Caitlinside
247  5651.0  Stephen Robinson   2021-03-04          Tinahaven
248  6412.0   Denise Bradford   2020-04-29         Barrettton
249  9656.0   Margaret Harris   2022-05-07    Port Jacqueline

[250 rows x 4 columns]


In [26]:
print(df["empcity"].to_string())

0               Weaverhaven
1                       NaN
2                       NaN
3                       NaN
4               New Brandon
5           New Patrickstad
6               Lake Alicia
7                Smithville
8            Carpentermouth
9            Michaelborough
10            Rodriguezland
11          Lake Bonnieport
12                      NaN
13                Scotttown
14         West Selenaburgh
15                      NaN
16               Clarkhaven
17           New Sandrafort
18               Lake Kevin
19                Jamesfort
20                New Angel
21             Lake Jessica
22                      NaN
23        Port Victorialand
24                East Alan
25                      NaN
26                Kevinbury
27                      NaN
28                      NaN
29                Whitestad
30                 Johnbury
31                Traviston
32                      NaN
33                      NaN
34          East Jacobhaven
35             Lake 

In [27]:
fdf=df[df["empcity"]=="Edwardstad"]

In [28]:
fdf

Unnamed: 0,empid,empname,joining_date,empcity
180,4453.0,Cynthia Santos,2023-05-10,Edwardstad


In [31]:
fdf1=df[df["empcity"]=="Diazton"]
fdf1

Unnamed: 0,empid,empname,joining_date,empcity
178,8148.0,Samuel Wright,2023-07-23,Diazton


In [32]:
df.isnull().sum()

empid           75
empname         75
joining_date    75
empcity         75
dtype: int64

In [33]:
df.dropna()

Unnamed: 0,empid,empname,joining_date,empcity
0,3717.0,Jenna Jensen,2020-05-15,Weaverhaven
4,8667.0,John Phillips,2021-02-03,New Brandon
5,8688.0,Nathan Mccarthy,2021-05-21,New Patrickstad
6,8349.0,Brandon Richards,2021-12-27,Lake Alicia
7,5104.0,Ryan Shaw,2020-08-10,Smithville
...,...,...,...,...
245,2280.0,Marc Rasmussen,2020-02-25,Fisherbury
246,1577.0,Scott Herrera,2021-08-03,North Caitlinside
247,5651.0,Stephen Robinson,2021-03-04,Tinahaven
248,6412.0,Denise Bradford,2020-04-29,Barrettton


In [35]:
df

Unnamed: 0,empid,empname,joining_date,empcity
0,,,,
1,2957.0,Brianna Jordan,2023-10-09,Pamelafort
2,2116.0,Rebecca Montgomery,2020-03-31,Johnnyville
3,2577.0,Judith Mitchell,2021-02-09,Ochoaview
4,6632.0,Mary Love,2020-11-13,New Jessica
...,...,...,...,...
245,9433.0,David Campos,2022-04-24,Rodriguezborough
246,2787.0,Derek Jarvis,2024-04-27,Laurietown
247,,,,
248,5223.0,Patty Greer,2020-03-02,Carrstad


In [41]:
df.fillna("unknown",inplace=True)

In [42]:
df

Unnamed: 0,empid,empname,joining_date,empcity
0,unknowm,unknowm,unknowm,unknowm
1,2957.0,Brianna Jordan,2023-10-09,Pamelafort
2,2116.0,Rebecca Montgomery,2020-03-31,Johnnyville
3,2577.0,Judith Mitchell,2021-02-09,Ochoaview
4,6632.0,Mary Love,2020-11-13,New Jessica
...,...,...,...,...
245,9433.0,David Campos,2022-04-24,Rodriguezborough
246,2787.0,Derek Jarvis,2024-04-27,Laurietown
247,unknowm,unknowm,unknowm,unknowm
248,5223.0,Patty Greer,2020-03-02,Carrstad


In [44]:
null_id_rows = df['empid'].isnull()

In [45]:
serial_numbers = range(1, null_id_rows.sum() + 1)

In [48]:
df.loc[null_id_rows, 'empid'] = serial_numbers

In [47]:
df

Unnamed: 0,empid,empname,joining_date,empcity,id
0,unknowm,unknowm,unknowm,unknowm,
1,2957.0,Brianna Jordan,2023-10-09,Pamelafort,
2,2116.0,Rebecca Montgomery,2020-03-31,Johnnyville,
3,2577.0,Judith Mitchell,2021-02-09,Ochoaview,
4,6632.0,Mary Love,2020-11-13,New Jessica,
...,...,...,...,...,...
245,9433.0,David Campos,2022-04-24,Rodriguezborough,
246,2787.0,Derek Jarvis,2024-04-27,Laurietown,
247,unknowm,unknowm,unknowm,unknowm,
248,5223.0,Patty Greer,2020-03-02,Carrstad,


In [49]:
df.drop("id",axis=1)

Unnamed: 0,empid,empname,joining_date,empcity
0,unknowm,unknowm,unknowm,unknowm
1,2957.0,Brianna Jordan,2023-10-09,Pamelafort
2,2116.0,Rebecca Montgomery,2020-03-31,Johnnyville
3,2577.0,Judith Mitchell,2021-02-09,Ochoaview
4,6632.0,Mary Love,2020-11-13,New Jessica
...,...,...,...,...
245,9433.0,David Campos,2022-04-24,Rodriguezborough
246,2787.0,Derek Jarvis,2024-04-27,Laurietown
247,unknowm,unknowm,unknowm,unknowm
248,5223.0,Patty Greer,2020-03-02,Carrstad


In [50]:
# Writing DataFrame to CSV
df.to_csv('employees.csv', index=False)

# Reading DataFrame from CSV
df_from_csv = pd.read_csv('employees.csv')
print(df_from_csv.head())


     empid             empname joining_date      empcity  id
0  unknowm             unknowm      unknowm      unknowm NaN
1   2957.0      Brianna Jordan   2023-10-09   Pamelafort NaN
2   2116.0  Rebecca Montgomery   2020-03-31  Johnnyville NaN
3   2577.0     Judith Mitchell   2021-02-09    Ochoaview NaN
4   6632.0           Mary Love   2020-11-13  New Jessica NaN


In [51]:
df["Experience"]=np.random.randint(0,1,size=num_rows)

In [52]:
df

Unnamed: 0,empid,empname,joining_date,empcity,id,Experience
0,unknowm,unknowm,unknowm,unknowm,,0
1,2957.0,Brianna Jordan,2023-10-09,Pamelafort,,0
2,2116.0,Rebecca Montgomery,2020-03-31,Johnnyville,,0
3,2577.0,Judith Mitchell,2021-02-09,Ochoaview,,0
4,6632.0,Mary Love,2020-11-13,New Jessica,,0
...,...,...,...,...,...,...
245,9433.0,David Campos,2022-04-24,Rodriguezborough,,0
246,2787.0,Derek Jarvis,2024-04-27,Laurietown,,0
247,unknowm,unknowm,unknowm,unknowm,,0
248,5223.0,Patty Greer,2020-03-02,Carrstad,,0


In [58]:
df1=df[df["Experience"]==0]

In [56]:
df1

Unnamed: 0,empid,empname,joining_date,empcity,id,Experience
0,unknowm,unknowm,unknowm,unknowm,,0
1,2957.0,Brianna Jordan,2023-10-09,Pamelafort,,0
2,2116.0,Rebecca Montgomery,2020-03-31,Johnnyville,,0
3,2577.0,Judith Mitchell,2021-02-09,Ochoaview,,0
4,6632.0,Mary Love,2020-11-13,New Jessica,,0
...,...,...,...,...,...,...
245,9433.0,David Campos,2022-04-24,Rodriguezborough,,0
246,2787.0,Derek Jarvis,2024-04-27,Laurietown,,0
247,unknowm,unknowm,unknowm,unknowm,,0
248,5223.0,Patty Greer,2020-03-02,Carrstad,,0


In [59]:
df.drop("Experience",axis=1)

Unnamed: 0,empid,empname,joining_date,empcity,id
0,unknowm,unknowm,unknowm,unknowm,
1,2957.0,Brianna Jordan,2023-10-09,Pamelafort,
2,2116.0,Rebecca Montgomery,2020-03-31,Johnnyville,
3,2577.0,Judith Mitchell,2021-02-09,Ochoaview,
4,6632.0,Mary Love,2020-11-13,New Jessica,
...,...,...,...,...,...
245,9433.0,David Campos,2022-04-24,Rodriguezborough,
246,2787.0,Derek Jarvis,2024-04-27,Laurietown,
247,unknowm,unknowm,unknowm,unknowm,
248,5223.0,Patty Greer,2020-03-02,Carrstad,


In [60]:
df["Experience"]=np.random.randint(2,3,size=num_rows)

In [61]:
df

Unnamed: 0,empid,empname,joining_date,empcity,id,Experience
0,unknowm,unknowm,unknowm,unknowm,,2
1,2957.0,Brianna Jordan,2023-10-09,Pamelafort,,2
2,2116.0,Rebecca Montgomery,2020-03-31,Johnnyville,,2
3,2577.0,Judith Mitchell,2021-02-09,Ochoaview,,2
4,6632.0,Mary Love,2020-11-13,New Jessica,,2
...,...,...,...,...,...,...
245,9433.0,David Campos,2022-04-24,Rodriguezborough,,2
246,2787.0,Derek Jarvis,2024-04-27,Laurietown,,2
247,unknowm,unknowm,unknowm,unknowm,,2
248,5223.0,Patty Greer,2020-03-02,Carrstad,,2


In [65]:
df1=df[df["Experience"]==3]

In [66]:
df1

Unnamed: 0,empid,empname,joining_date,empcity,id,Experience
