Import Library 

In [1]:
import pandas as pd 
import numpy as np 

# Dictionary 

from python


In [2]:
python_list = [10, 20, 30, 40, 50]
series_from_list = pd.Series(python_list)
print("Series from list:\n", series_from_list)

Series from list:
 0    10
1    20
2    30
3    40
4    50
dtype: int64


From numpy 

In [3]:
numpy_array = np.array([1, 2, 3, 4, 5])
series_from_array = pd.Series(numpy_array)
print("\nSeries from numpy array:\n", series_from_array)


Series from numpy array:
 0    1
1    2
2    3
3    4
4    5
dtype: int32


From Dictionary 

In [4]:
dictionary = {'a': 100, 'b': 200, 'c': 300}
series_from_dict = pd.Series(dictionary)
print("\nSeries from dictionary:\n", series_from_dict)


Series from dictionary:
 a    100
b    200
c    300
dtype: int64


### Questions

Assign a custom index to the Series.

In [5]:
custom_index_series = pd.Series(python_list, index=['A', 'B', 'C', 'D', 'E'])
print("\nSeries with custom index:\n", custom_index_series)



Series with custom index:
 A    10
B    20
C    30
D    40
E    50
dtype: int64


Perform basic arithmetic operations on Series.

In [6]:
series_1 = pd.Series([1, 2, 3, 4, 5])
series_2 = pd.Series([10, 20, 30, 40, 50])

sum_series = series_1 + series_2
print("\nSum of Series:\n", sum_series)

diff_series = series_2 - series_1
print("\nDifference of Series:\n", diff_series)



Sum of Series:
 0    11
1    22
2    33
3    44
4    55
dtype: int64

Difference of Series:
 0     9
1    18
2    27
3    36
4    45
dtype: int64


Access elements using index labels and positions.

In [7]:
print("\nElement at index 'B':", custom_index_series['B'])
print("\nElement at position 2:", custom_index_series.iloc[2])



Element at index 'B': 20

Element at position 2: 30


Filter the Series to include only values greater than a specific threshold.

In [8]:
filtered_series = custom_index_series[custom_index_series > 30]
print("\nFiltered Series (values > 30):\n", filtered_series)



Filtered Series (values > 30):
 D    40
E    50
dtype: int64


Create a DataFrame from a dictionary of lists.

In [9]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['New York', 'Los Angeles', 'Chicago']
}
df_from_dict = pd.DataFrame(data)
print("\nDataFrame from dictionary of lists:\n", df_from_dict)



DataFrame from dictionary of lists:
       Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago


Create a DataFrame from a numpy array, specifying column and index names.


In [10]:
numpy_data = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
df_from_array = pd.DataFrame(numpy_data, columns=['A', 'B', 'C'], index=['X', 'Y', 'Z'])
print("\nDataFrame from numpy array:\n", df_from_array)



DataFrame from numpy array:
    A  B  C
X  1  2  3
Y  4  5  6
Z  7  8  9


Load a DataFrame from a CSV file.


In [11]:

df_from_csv = pd.read_csv('myfile.csv')
print("\nDataFrame from CSV:\n", df_from_csv)





DataFrame from CSV:
    Id   Name    Age   Grade 
0    1  jake       3       B
1    2   jake     21       A
2    3  rabia     21       B
3    4   john     18       C


Display the first and last five rows of the DataFrame

In [12]:
print("\nFirst two rows of the DataFrame:\n", df_from_csv.head(2))
print("\nLast two rows of the DataFrame:\n", df_from_csv.tail(2))



First two rows of the DataFrame:
    Id   Name    Age   Grade 
0    1  jake       3       B
1    2   jake     21       A

Last two rows of the DataFrame:
    Id   Name    Age   Grade 
2    3  rabia     21       B
3    4   john     18       C


Get a summary of the DataFrame including the mean, median, and standard deviation of numeric columns

In [13]:
summary = df_from_csv.describe()
print("\nSummary of DataFrame:\n", summary)


Summary of DataFrame:
             Id        Age 
count  4.000000   4.000000
mean   2.500000  15.750000
std    1.290994   8.616844
min    1.000000   3.000000
25%    1.750000  14.250000
50%    2.500000  19.500000
75%    3.250000  21.000000
max    4.000000  21.000000


In [14]:
mean_values = df_from_csv.mean(numeric_only=True)
median_values = df_from_csv.median(numeric_only=True)
std_dev_values = df_from_csv.std(numeric_only=True)

print("\nMean values:\n", mean_values)
print("\nMedian values:\n", median_values)
print("\nStandard deviation values:\n", std_dev_values)


Mean values:
 Id        2.50
 Age     15.75
dtype: float64

Median values:
 Id        2.5
 Age     19.5
dtype: float64

Standard deviation values:
 Id       1.290994
 Age     8.616844
dtype: float64


Check column names

In [15]:
print("\nColumns in the DataFrame:", df_from_csv.columns)



Columns in the DataFrame: Index(['Id ', ' Name ', ' Age ', ' Grade '], dtype='object')


Extract Column Age From data

In [16]:
df_from_csv = pd.read_csv('myfile.csv')
print("\nDataFrame from CSV file:\n", df_from_csv)

print("\nColumns in the DataFrame:", df_from_csv.columns)

df_from_csv.columns = df_from_csv.columns.str.strip()


for col in df_from_csv.columns:
    print(f"Column name: '{col}'")


try:
    specific_column = df_from_csv['Age']  
    print("\nExtracted 'Age' column as Series:\n", specific_column)
except KeyError as e:
    print(f"\nError: {e}. Column not found in DataFrame.")



DataFrame from CSV file:
    Id   Name    Age   Grade 
0    1  jake       3       B
1    2   jake     21       A
2    3  rabia     21       B
3    4   john     18       C

Columns in the DataFrame: Index(['Id ', ' Name ', ' Age ', ' Grade '], dtype='object')
Column name: 'Id'
Column name: 'Name'
Column name: 'Age'
Column name: 'Grade'

Extracted 'Age' column as Series:
 0     3
1    21
2    21
3    18
Name: Age, dtype: int64


Filter rows based on column values

In [17]:


df_from_csv = pd.read_csv('myfile.csv')
print("\nDataFrame from CSV file:\n", df_from_csv)

print("\nColumns in the DataFrame:", df_from_csv.columns)

df_from_csv.columns = df_from_csv.columns.str.strip()

for col in df_from_csv.columns:
    print(f"Column name: '{col}'")

try:
    specific_column = df_from_csv['Age']  
    print("\nExtracted 'Age' column as Series:\n", specific_column)
except KeyError as e:
    print(f"\nError: {e}. Column not found in DataFrame.")

threshold = 30  
try:
    filtered_rows = df_from_csv[df_from_csv['Age'] > threshold]
    print(f"\nFiltered rows (Age > {threshold}):\n", filtered_rows)
except KeyError as e:
    print(f"\nError: {e}. Column not found in DataFrame.")




DataFrame from CSV file:
    Id   Name    Age   Grade 
0    1  jake       3       B
1    2   jake     21       A
2    3  rabia     21       B
3    4   john     18       C

Columns in the DataFrame: Index(['Id ', ' Name ', ' Age ', ' Grade '], dtype='object')
Column name: 'Id'
Column name: 'Name'
Column name: 'Age'
Column name: 'Grade'

Extracted 'Age' column as Series:
 0     3
1    21
2    21
3    18
Name: Age, dtype: int64

Filtered rows (Age > 30):
 Empty DataFrame
Columns: [Id, Name, Age, Grade]
Index: []


Select rows based on multiple conditions

In [18]:
df_from_csv = pd.read_csv('myfile.csv')
print("\nDataFrame from CSV file:\n", df_from_csv)

print("\nColumns in the DataFrame:", df_from_csv.columns)

df_from_csv.columns = df_from_csv.columns.str.strip()

for col in df_from_csv.columns:
    print(f"Column name: '{col}'")

try:
    age_condition = df_from_csv['Age'] > 25  
    city_condition = df_from_csv['Grade'] != 'D'  

    
    filtered_rows_multi = df_from_csv[age_condition & city_condition]
    print("\nFiltered rows based on multiple conditions (Age > 25 and Grade != 'D'):\n", filtered_rows_multi)
except KeyError as e:
    print(f"\nError: {e}. Column not found in DataFrame.")



DataFrame from CSV file:
    Id   Name    Age   Grade 
0    1  jake       3       B
1    2   jake     21       A
2    3  rabia     21       B
3    4   john     18       C

Columns in the DataFrame: Index(['Id ', ' Name ', ' Age ', ' Grade '], dtype='object')
Column name: 'Id'
Column name: 'Name'
Column name: 'Age'
Column name: 'Grade'

Filtered rows based on multiple conditions (Age > 25 and Grade != 'D'):
 Empty DataFrame
Columns: [Id, Name, Age, Grade]
Index: []


Add a column to Dataframe

In [33]:

df = pd.read_csv('myfile.csv')
subjects = ['Maths', 'Chemistry', 'Physics']
df['Subject'] = subjects * (len(df) // len(subjects)) + subjects[:len(df) % len(subjects)]

print(df)

df.to_csv('updated_file.csv', index=False) 



   Id   Name    Age   Grade     Subject
0    1  jake       3       B      Maths
1    2   jake     21       A  Chemistry
2    3  rabia     21       B    Physics
3    4   john     18       C      Maths


Delete a column

In [34]:
print(df.columns)


Index(['Id ', ' Name ', ' Age ', ' Grade ', 'Subject'], dtype='object')


In [35]:
df = pd.read_csv('myfile.csv')

print("DataFrame columns:")
print(df.columns)

if 'subject' in df.columns:
   
    df.drop(columns=['subject'], inplace=True)
    print("\n'subject' column deleted successfully.")
else:
    print("\n'subject' column not found in DataFrame.")


print("\nDataFrame after deletion:")
print(df)


DataFrame columns:
Index(['Id ', ' Name ', ' Age ', ' Grade '], dtype='object')

'subject' column not found in DataFrame.

DataFrame after deletion:
   Id   Name    Age   Grade 
0    1  jake       3       B
1    2   jake     21       A
2    3  rabia     21       B
3    4   john     18       C


Rename Column

In [36]:

data = {
    'Id': ['001', '002', '003', '004'],
    'Name': ['jake', 'jake', 'rabia', 'john'],
    'Age': [3, 21, 21, 18],
    'Grade': ['B', 'A', 'B', 'C']
}
df = pd.DataFrame(data)


print("Before renaming columns:")
print(df)


df.rename(columns={'Id': 'StudentID', 'Name': 'StudentName', 'Age': 'StudentAge', 'Grade': 'StudentGrade'}, inplace=True)


print("\nAfter renaming columns:")
print(df)


Before renaming columns:
    Id   Name  Age Grade
0  001   jake    3     B
1  002   jake   21     A
2  003  rabia   21     B
3  004   john   18     C

After renaming columns:
  StudentID StudentName  StudentAge StudentGrade
0       001        jake           3            B
1       002        jake          21            A
2       003       rabia          21            B
3       004        john          18            C
