In [28]:
import pandas as pd

# Part A: Data Creation & Exploration

In [29]:
e = pd.Series([78, 85, 90, 66, 72])
print(e)

0    78
1    85
2    90
3    66
4    72
dtype: int64


In [61]:
data = {
    'Employe': ['John', 'Priya','Ahmed', 'Sneha'],
    'Department': ['HR', 'IT', 'Finance' , 'Marketing'],
    'Experience (Years)':[3,None,2,4]
}
sf= pd.DataFrame(data)
print(sf)

  Employe Department  Experience (Years)
0    John         HR                 3.0
1   Priya         IT                 NaN
2   Ahmed    Finance                 2.0
3   Sneha  Marketing                 4.0


In [31]:
print(sf.head(2)) 

  Employe Department  Experience (Years)
0    John         HR                 3.0
1   Priya         IT                 NaN


In [32]:
print(sf.columns) 

Index(['Employe', 'Department', 'Experience (Years)'], dtype='object')


In [33]:
print(sf.describe()) 

       Experience (Years)
count                 3.0
mean                  3.0
std                   1.0
min                   2.0
25%                   2.5
50%                   3.0
75%                   3.5
max                   4.0


# Part B: File Handling

In [34]:
sf.to_csv(r'D:\Sushmitha\Data Science\employees.csv', index=False)

In [35]:
sf_csv = pd.read_csv(r'D:\Sushmitha\Data Science\employees.csv')
sf

Unnamed: 0,Employe,Department,Experience (Years)
0,John,HR,3.0
1,Priya,IT,
2,Ahmed,Finance,2.0
3,Sneha,Marketing,4.0


# Part C: Data Selection and Filtering

In [36]:
print(sf['Employe'])

0     John
1    Priya
2    Ahmed
3    Sneha
Name: Employe, dtype: object


In [37]:
print(sf.loc[2]) 

Employe                 Ahmed
Department            Finance
Experience (Years)        2.0
Name: 2, dtype: object


In [38]:
print(sf[sf['Experience (Years)'] > 3])

  Employe Department  Experience (Years)
3   Sneha  Marketing                 4.0


In [39]:
print(sf[sf['Experience (Years)'] >= 3])

  Employe Department  Experience (Years)
0    John         HR                 3.0
3   Sneha  Marketing                 4.0


In [68]:
sf['Salary'] = [40000, 55000, 42000, 50000]

In [69]:
sf

Unnamed: 0_level_0,Department,Experience (Years),Experience_plus_1,Salary
Employe,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
John,HR,3.0,4.0,40000
Priya,IT,,,55000
Ahmed,Finance,2.0,3.0,42000
Sneha,Marketing,4.0,5.0,50000


In [42]:
sf = sf.drop('Department', axis=1)

In [43]:
sf

Unnamed: 0,Employe,Experience (Years),Salary
0,John,3.0,40000
1,Priya,,55000
2,Ahmed,2.0,42000
3,Sneha,4.0,50000


# 3 Part D: Handling Missing Values

In [59]:
sf['Experience (Years)'].fillna(sf['Experience (Years)'].mean(), inplace=True) 
sf

KeyError: 'Experience (Years)'

# Part E: Sorting and Grouping

In [45]:
print(sf.sort_values('Experience (Years)'))

  Employe  Experience (Years)  Salary
2   Ahmed                 2.0   42000
0    John                 3.0   40000
1   Priya                 3.0   55000
3   Sneha                 4.0   50000


In [66]:
print(sf.sort_values('Experience (Years)', ascending=True))

        Department  Experience (Years)  Experience_plus_1
Employe                                                  
Ahmed      Finance                 2.0                3.0
John            HR                 3.0                4.0
Sneha    Marketing                 4.0                5.0
Priya           IT                 NaN                NaN


In [47]:
data = {
    'Region': ['North', 'South', 'North', 'East', 'West', 'South'],
    'Sales': [12000, 15000, 11000, 13000, 12500, 14000]
}
sf = pd.DataFrame(data)
sf

Unnamed: 0,Region,Sales
0,North,12000
1,South,15000
2,North,11000
3,East,13000
4,West,12500
5,South,14000


In [48]:
average = sf.groupby('Region')['Sales'].mean()
average

Region
East     13000.0
North    11500.0
South    14500.0
West     12500.0
Name: Sales, dtype: float64

# Part F: Merging, Concatenation, and Apply

In [49]:
sf1 = pd.DataFrame({'EmpID': [1, 2], 'Name': ['John', 'Priya']})
sf2 = pd.DataFrame({'EmpID': [1, 2], 'Bonus': [5000, 7000]})
sf1
sf2

Unnamed: 0,EmpID,Bonus
0,1,5000
1,2,7000


In [50]:
result = pd.merge(sf1, sf2, on='EmpID')
print(result)

   EmpID   Name  Bonus
0      1   John   5000
1      2  Priya   7000


In [51]:
sf1 = pd.DataFrame({'X': [10, 20]})
sf2 = pd.DataFrame({'X': [30, 40]})
sf1
sf2

Unnamed: 0,X
0,30
1,40


In [52]:
result = pd.concat([sf1,sf2])
result

Unnamed: 0,X
0,10
1,20
0,30
1,40


In [63]:
sf["Experience_plus_1"] = sf["Experience (Years)"].apply(lambda x:x+1)
sf

Unnamed: 0_level_0,Department,Experience (Years),Experience_plus_1
Employe,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
John,HR,3.0,4.0
Priya,IT,,
Ahmed,Finance,2.0,3.0
Sneha,Marketing,4.0,5.0


# Part G: Index Manipulation

In [57]:
sf.reset_index(drop=True, inplace=True)  # Reset index
sf

Unnamed: 0,Region,Sales
0,North,12000
1,South,15000
2,North,11000
3,East,13000
4,West,12500
5,South,14000


In [62]:
sf.set_index('Employe', inplace=True)  
sf

Unnamed: 0_level_0,Department,Experience (Years)
Employe,Unnamed: 1_level_1,Unnamed: 2_level_1
John,HR,3.0
Priya,IT,
Ahmed,Finance,2.0
Sneha,Marketing,4.0


# Part H: Filtering with Custom Conditions

In [64]:
def filter_data(sf):
    return sf[(sf['Experience (Years)'] >= 4) & (sf['Salary'] > 45000)]

In [70]:
filtered=filter_data(sf)
print(filtered)

        Department  Experience (Years)  Experience_plus_1  Salary
Employe                                                          
Sneha    Marketing                 4.0                5.0   50000
