In [9]:
import pandas as pd

# Creating a DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie', 'David'],
        'Age': [25, 30, 35, 40]}
df = pd.DataFrame(data)
"""
Description:
- The code creates a DataFrame using a dictionary where 'Name' and 'Age' are keys, and lists of corresponding values are provided.
Parameters:
- data: A dictionary containing the data to be used to create the DataFrame.
Process:
- The pd.DataFrame() function creates a DataFrame object from the provided dictionary.
Returned Value:
- df: A DataFrame object containing the data.
"""

# Reading data from a CSV file
df_from_csv = pd.read_csv('data.csv')
"""
Description:
- This line reads data from a CSV file named 'data.csv' and loads it into a DataFrame named df_from_csv.
Parameters:
- 'data.csv': The file path of the CSV file to be read.
Process:
- The pd.read_csv() function reads the CSV file specified by the file path and loads its contents into a DataFrame.
Returned Value:
- df_from_csv: A DataFrame object containing the data from the CSV file.
"""

# Displaying the DataFrame
print("DataFrame:")
print(df)
"""
Description:
- This code block prints the DataFrame to the console.
Process:
- The print() function is used to display the DataFrame.
Returned Value:
- None.
"""

# Displaying the first few rows of the DataFrame
print("First few rows:")
print(df.head())
"""
Description:
- This code block prints the first few rows of the DataFrame.
Parameters:
- None.
Process:
- The head() method retrieves the first five rows of the DataFrame by default.
Returned Value:
- None.
"""

# Summary statistics of the DataFrame
print("DataFrame summary statistics:")
print(df.describe())
"""
Description:
- This code block prints summary statistics of the DataFrame, including count, mean, standard deviation, minimum, 25th percentile, median, 75th percentile, and maximum values for numeric columns.
Parameters:
- None.
Process:
- The describe() method computes summary statistics of the DataFrame.
Returned Value:
- None.
"""

# Selecting a column
print("Selecting a column 'Name':")
print(df['Name'])
"""
Description:
- This code block selects and prints the 'Name' column of the DataFrame.
Parameters:
- 'Name': The name of the column to be selected.
Process:
- Accessing a column in a DataFrame using square brackets and passing the column name.
Returned Value:
- None.
"""

# Adding a new column
df['Gender'] = ['Female', 'Male', 'Male', 'Male']
print("DataFrame with a new column 'Gender':")
print(df)
"""
Description:
- This code block adds a new column named 'Gender' to the DataFrame and assigns values to it.
Parameters:
- 'Gender': The name of the new column.
- List of values: Values to be assigned to the new column.
Process:
- Adding a new column to the DataFrame by assigning values to it.
Returned Value:
- None.
"""

# Removing a column
df.drop('Gender', axis=1, inplace=True)
print("DataFrame with 'Gender' column removed:")
print(df)
"""
Description:
- This code block removes the 'Gender' column from the DataFrame.
Parameters:
- 'Gender': The name of the column to be removed.
- axis=1: Indicates that the operation should be applied along columns.
- inplace=True: Modifies the DataFrame in place.
Process:
- Removing a column from the DataFrame using the drop() method.
Returned Value:
- None.
"""

# Filtering rows
print("Filtering rows where Age > 30:")
print(df[df['Age'] > 30])
"""
Description:
- This code block filters and prints rows where the 'Age' column is greater than 30.
Parameters:
- df['Age'] > 30: The condition to filter rows.
Process:
- Filtering rows based on a condition using boolean indexing.
Returned Value:
- None.
"""

# Applying a function to a column
df['Age'] = df['Age'].apply(lambda x: x * 2)
print("Applying a function to double the 'Age' column:")
print(df)
"""
Description:
- This code block applies a lambda function to double the values in the 'Age' column.
Parameters:
- lambda x: x * 2: The function to be applied to each element in the 'Age' column.
Process:
- Applying a function to each element in a column using the apply() method.
Returned Value:
- None.
"""

# Checking for missing values
print("Checking for missing values:")
print(df.isnull())
"""
Description:
- This code block checks for missing values in the DataFrame and prints a boolean DataFrame indicating whether each element is missing or not.
Parameters:
- None.
Process:
- Checking for missing values using the isnull() method.
Returned Value:
- None.
"""

# Dropping rows with missing values
df.dropna(inplace=True)
print("DataFrame after dropping rows with missing values:")
print(df)
"""
Description:
- This code block removes rows with missing values from the DataFrame.
Parameters:
- inplace=True: Modifies the DataFrame in place.
Process:
- Removing rows with missing values using the dropna() method.
Returned Value:
- None.
"""

# Filling missing values with a specific value
df.fillna(0, inplace=True)
print("DataFrame after filling missing values with 0:")
print(df)
"""
Description:
- This code block fills missing values in the DataFrame with a specific value (0 in this case).
Parameters:
- inplace=True: Modifies the DataFrame in place.
Process:
- Filling missing values with a specific value using the fillna() method.
Returned Value:
- None.
"""


DataFrame:
      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
3    David   40
First few rows:
      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
3    David   40
DataFrame summary statistics:
             Age
count   4.000000
mean   32.500000
std     6.454972
min    25.000000
25%    28.750000
50%    32.500000
75%    36.250000
max    40.000000
Selecting a column 'Name':
0      Alice
1        Bob
2    Charlie
3      David
Name: Name, dtype: object
DataFrame with a new column 'Gender':
      Name  Age  Gender
0    Alice   25  Female
1      Bob   30    Male
2  Charlie   35    Male
3    David   40    Male
DataFrame with 'Gender' column removed:
      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
3    David   40
Filtering rows where Age > 30:
      Name  Age
2  Charlie   35
3    David   40
Applying a function to double the 'Age' column:
      Name  Age
0    Alice   50
1      Bob   60
2  Charlie   70
3    David   80
Checking for missing values:
    Name   

'\nDescription:\n- This code block fills missing values in the DataFrame with a specific value (0 in this case).\nParameters:\n- inplace=True: Modifies the DataFrame in place.\nProcess:\n- Filling missing values with a specific value using the fillna() method.\nReturned Value:\n- None.\n'

In [16]:
import pandas as pd

# Creating a dictionary
var = [{"name": "Prakash Shahi", "rollNo": 22}]

# Creating a DataFrame
df = pd.DataFrame(var)

# Printing the DataFrame
print(df)


            name  rollNo
0  Prakash Shahi      22
