In [1]:
import pandas as pd

#### Series objects

In [2]:
# Create a Series from a list
data = [10, 20, 30, 40, 50]
series = pd.Series(data)
print(series)

# Output:
# 0    10
# 1    20
# 2    30
# 3    40
# 4    50
# dtype: int64

0    10
1    20
2    30
3    40
4    50
dtype: int64


In [4]:
# Create a Series with custom index labels
data = [10, 20, 30, 40, 50]
index_labels = ['A', 'B', 'C', 'D', 'E']
series = pd.Series(data, index=index_labels)
print(series)

# Output:
# A    10
# B    20
# C    30
# D    40
# E    50
# dtype: int64

A    10
B    20
C    30
D    40
E    50
dtype: int64


In [5]:
# Using positional-based indexing
print(series.iloc[0])  # Output: 10
print(series.iloc[2])  # Output: 30

# Using label-based indexing
print(series.loc['A'])  # Output: 10
print(series.loc['C'])  # Output: 30

10
30
10
30


In [6]:
# Using slicing with positional-based indexing
print(series.iloc[1:4])  
# Output: B    20
#         C    30
#         D    40
#         dtype: int64

# Using slicing with label-based indexing
print(series.loc['B':'D'])  
# Output: B    20
#         C    30
#         D    40
#         dtype: int64

B    20
C    30
D    40
dtype: int64
B    20
C    30
D    40
dtype: int64


In [7]:
# Using slicing with positional-based indexing
print(series.iloc[[1,2,3]])  
# Output: B    20
#         C    30
#         D    40
#         dtype: int64

# Using slicing with label-based indexing
print(series.loc[['B', 'C', 'D']])  
# Output: B    20
#         C    30
#         D    40
#         dtype: int64

B    20
C    30
D    40
dtype: int64
B    20
C    30
D    40
dtype: int64


In [10]:
# Using boolean indexing with label-based indexing
print(series.loc[series > 20])  
# Output: C    30
#         D    30
#         E    50
#         dtype: int64

# Boolean indexing with .iloc requires
# a list-like object without explicit labels!
try:
    print(series.iloc[series > 20]) 
except:
    print("This does not work!")
# Error!

# Using boolean indexing with position-based indexing
print(series.iloc[(series > 20).values])  
# Output: C    30
#         D    30
#         E    50
#         dtype: int64

C    30
D    40
E    50
dtype: int64
This does not work!
C    30
D    40
E    50
dtype: int64


In [12]:
# Adding two Series element-wise
series1 = pd.Series([1, 2, 3])
series2 = pd.Series([10, 20, 30])
result = series1 + series2
# Output: 0    11
#         1    22
#         2    33
#         dtype: int64
print(result)

0    11
1    22
2    33
dtype: int64


In [13]:
series = pd.Series([5, 8, None, 3, None, 10], index = [1, 2, 3, 4, 5, 6])
series = series.fillna(0)
print(series)

1     5.0
2     8.0
3     0.0
4     3.0
5     0.0
6    10.0
dtype: float64


In [14]:
series = series.fillna(0).astype(int)
print(series)

1     5
2     8
3     0
4     3
5     0
6    10
dtype: int32


In [17]:
# Create a DataFrame from a dictionary
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['New York', 'San Francisco', 'Los Angeles']
}

df = pd.DataFrame(data)

display(df)

# Output:
#		Name		Age	City
# 0	Alice		25	New York
# 1	Bob			30	San Francisco
# 2	Charlie	35	Los Angeles

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,San Francisco
2,Charlie,35,Los Angeles


In [19]:
# Create a DataFrame from a list of lists
data = [
    ['Alice', 25, 'New York'],
    ['Bob', 30, 'San Francisco'],
    ['Charlie', 35, 'Los Angeles']
]

df = pd.DataFrame(data, columns=['Name', 'Age', 'City'])

display(df)

# Output:
#	Name			Age	City
#	0	Alice		25	New York
#	1	Bob			30	San Francisco
#	2	Charlie	35	Los Angeles

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,San Francisco
2,Charlie,35,Los Angeles


#### Loading and saving tables

In [20]:
df.to_csv('file_name.csv', index=False)

In [21]:
df.to_excel('file_name.xlsx', index=False)

In [22]:
df = pd.read_csv('file_name.csv')

In [23]:
df = pd.read_excel('file_name.xlsx')

In [32]:
filename = "file_name.xlsx"
# Basic usage
df = pd.read_excel(filename)
print('Basic')
display(df)

# Specify sheet name
df = pd.read_excel(filename, sheet_name='Sheet1')
print('Specify sheet name')
display(df)

# Specify header row
df = pd.read_excel(filename, header=1)
print("Specify header")
display(df)


# Specify columns to read
df = pd.read_excel(filename, usecols=['Name', 'Age'])
print("Specify columns to be read")
display(df)

# Skip rows
df = pd.read_excel(filename, skiprows=2)
print("Skip rows")
display(df)

# Specify column data types
df = pd.read_excel(filename, dtype={'Name': str, 'Age': int, 'City' : str})
print("Specify types")
display(df)

# Handling missing values
df = pd.read_excel('file_name_with_missing.xlsx', na_values=['N/A', 'NA', 'NaN'])
print("Specify missing values")
display(df)

# Specify index column
df = pd.read_excel(filename, index_col='Name')
print("Specify index columns")
display(df)

# Abbreviate city name
# converters can in general be used
# to gain control over how columns
# are read.
df = pd.read_excel(filename, 
                   converters={
                       'City': lambda x : ''.join([word[0] for word in x.split()])
                   })
print("Use converters to abbreviate city")
display(df)

# Read specific range of rows
df = pd.read_excel(filename, skiprows=1, nrows=2)
print("Read specific range of rows")
display(df)

Basic


Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,San Francisco
2,Charlie,35,Los Angeles


Specify sheet name


Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,San Francisco
2,Charlie,35,Los Angeles


Specify header


Unnamed: 0,Alice,25,New York
0,Bob,30,San Francisco
1,Charlie,35,Los Angeles


Specify columns to be read


Unnamed: 0,Name,Age
0,Alice,25
1,Bob,30
2,Charlie,35


Skip rows


Unnamed: 0,Bob,30,San Francisco
0,Charlie,35,Los Angeles


Specify types


Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,San Francisco
2,Charlie,35,Los Angeles


Specify missing values


Unnamed: 0,Name,Age,City
0,,25.0,New York
1,Bob,,San Francisco
2,Charlie,35.0,


Specify index columns


Unnamed: 0_level_0,Age,City
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alice,25,New York
Bob,30,San Francisco
Charlie,35,Los Angeles


Use converters to abbreviate city


Unnamed: 0,Name,Age,City
0,Alice,25,NY
1,Bob,30,SF
2,Charlie,35,LA


Read specific range of rows


Unnamed: 0,Alice,25,New York
0,Bob,30,San Francisco
1,Charlie,35,Los Angeles


In [29]:
a = 'New York'


'NY'

In [34]:
# Read multiple sheets
xls = pd.ExcelFile('mult_sheets.xlsx')
sheet_names = xls.sheet_names
dfs = {sheet_name: pd.read_excel(xls, sheet_name) for sheet_name in sheet_names}

for sheet_name, df in dfs.items():
    print(sheet_name)
    display(df)

Employee


Unnamed: 0,Name,City
0,Tim,Putte


Product


Unnamed: 0,Name,Price
0,Laptop,999


#### Looping through a Pandas DataFrame

In [37]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'Age': [25, 30, 22, 28, 35],
    'City': ['New York', 'San Francisco', 'Los Angeles', 'Chicago', 'Seattle']
}

df = pd.DataFrame(data)

for index, row in df.iterrows():
    print(f"Index: {index}, Name: {row['Name']}")
    print(f"The row has type {type(row)}")

Index: 0, Name: Alice
The row has type <class 'pandas.core.series.Series'>
Index: 1, Name: Bob
The row has type <class 'pandas.core.series.Series'>
Index: 2, Name: Charlie
The row has type <class 'pandas.core.series.Series'>
Index: 3, Name: David
The row has type <class 'pandas.core.series.Series'>
Index: 4, Name: Eve
The row has type <class 'pandas.core.series.Series'>


#### Indexing DataFrames

In [41]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [25, 30, 22, 27],
    'City': ['New York', 'London', 'Paris', 'Tokyo']
}

df = pd.DataFrame(data)

In [40]:
value = df.iloc[1, 1]  # Returns 30
print(value)

30


In [42]:
df = df.set_index('Name')
value = df.loc['Bob', 'Age']  # Returns 30
print(value)

30


In [44]:
bob_information = df.loc['Bob', :]
display(bob_information)

Age         30
City    London
Name: Bob, dtype: object

In [49]:
# Create a DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie'],
        'Age': [25, 32, 28]}
df = pd.DataFrame(data)

# Indexing that yields views
copy_part = df.loc[0:1]  # Using .loc[] for slicing
# Modifying the view affects the original
copy_part['Age'] = copy_part['Age'] + 1  


print(df)

      Name  Age
0    Alice   25
1      Bob   32
2  Charlie   28


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  copy_part['Age'] = copy_part['Age'] + 1


In [52]:
df.loc[0:1, 'Age'] += 1
display(df)

Unnamed: 0,Name,Age
0,Alice,27
1,Bob,34
2,Charlie,28


In [53]:
# Create a DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie'],
        'Age': [25, 32, 28]}
df = pd.DataFrame(data)

# Indexing that yields views
copy_part = df.loc[0:1].copy()  # Using .loc[] for slicing
# Modifying the view affects the original
copy_part['Age'] = copy_part['Age'] + 1  


print(df)

      Name  Age
0    Alice   25
1      Bob   32
2  Charlie   28


#### Universal functions

In [55]:
df['Age'] = df['Age'].astype(float)
print(df)

      Name   Age
0    Alice  25.0
1      Bob  32.0
2  Charlie  28.0


In [68]:
data1 = [{'Name' : 'John', 'Score' : 85},
           {'Name' : 'Mary', 'Score' : 92}]
df1 = pd.DataFrame(data1).set_index('Name')

data2 = [{'Name' : 'Tim', 'Score' : 78},
             {'Name' : 'Kasper', 'Score' : 88},
             {'Name' : 'John', 'Score' : 93}]
df2 = pd.DataFrame(data2).set_index('Name')

display(df1 + df2)

Unnamed: 0_level_0,Score
Name,Unnamed: 1_level_1
John,178.0
Kasper,
Mary,
Tim,


In [69]:
display(df1.add(df2, fill_value = 0))

Unnamed: 0_level_0,Score
Name,Unnamed: 1_level_1
John,178.0
Kasper,88.0
Mary,92.0
Tim,78.0


In [59]:
df1.add?

#### The .apply functionality

In [4]:
df = pd.DataFrame([{'Product' : 'Apple', 'Price' : 0.4},
                      {'Product' : 'Laptop', 'Price' : 999}])

In [6]:
def euro_to_usd(price_in_euro, exchange_rate = 1.2):
    return price_in_euro * exchange_rate

display(df)
df['Price'] = df['Price'].apply(euro_to_usd)
display(df)

Unnamed: 0,Product,Price
0,Apple,0.48
1,Laptop,1198.8


Unnamed: 0,Product,Price
0,Apple,0.576
1,Laptop,1438.56


In [8]:
exchange_rate = 1 / 1.2
df['Price'] = df['Price'].apply(lambda x : x * exchange_rate)

#### Working with missing values

In [3]:
data = {
    'A': [1, 2, None, 4, 5],
    'B': [None, 6, 7, None, 9]
}

df = pd.DataFrame(data)
df

Unnamed: 0,A,B
0,1.0,
1,2.0,6.0
2,,7.0
3,4.0,
4,5.0,9.0


In [4]:
sum_with_nan = df.sum(axis = 1, skipna=False)
display(sum_with_nan)

0     NaN
1     8.0
2     NaN
3     NaN
4    14.0
dtype: float64

In [5]:
mask = df.isnull()
print(mask)

       A      B
0  False   True
1  False  False
2   True  False
3  False   True
4  False  False


In [7]:
df_dropped = df.dropna()
display(df_dropped)

Unnamed: 0,A,B
1,2.0,6.0
4,5.0,9.0


In [8]:
df_filled = df.fillna(0)
display(df_filled)

Unnamed: 0,A,B
0,1.0,0.0
1,2.0,6.0
2,0.0,7.0
3,4.0,0.0
4,5.0,9.0


In [9]:
df_ffilled = df.fillna(method='ffill')
display(df_ffilled)
df_bfilled = df.fillna(method='bfill')
display(df_bfilled)

Unnamed: 0,A,B
0,1.0,
1,2.0,6.0
2,2.0,7.0
3,4.0,7.0
4,5.0,9.0


Unnamed: 0,A,B
0,1.0,6.0
1,2.0,6.0
2,4.0,7.0
3,4.0,9.0
4,5.0,9.0


#### Tips & Tricks

In [11]:
# Load data into a DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie', 'David'],
        'Age': [25, 32, 28, 23],
        'Salary': [60000, 75000, 80000, 45000]}
df = pd.DataFrame(data)

# Chaining operations
result = df.filter(items=['Name', 'Salary'])\
            .sort_values(by='Salary', ascending=False)\
            .reset_index(drop=True)  # Reset index
display(result)

Unnamed: 0,Name,Salary
0,Charlie,80000
1,Bob,75000
2,Alice,60000
3,David,45000


In [21]:
def create_top_city_leaderboard(df):
    df.dropna(subset=["city"], inplace=True)
    df.sort_values(by=["sales"], ascending=False, inplace=True)
    return df

def create_top_city_leaderboard_no_inplace(df):
    df = df.dropna(subset=["city"])
    df.sort_values(by=["sales"], ascending=False, inplace=True)
    return df

def calculate_total_sales(df):
    return df["sales"].sum()


In [22]:
df = pd.DataFrame(
    {
        "city": ["London", "Amsterdam", "New York", None],
        "sales": [100, 300, 200, 400],
    }
)

print(calculate_total_sales(df))
# Output: 1000
print(create_top_city_leaderboard(df))
# Output:
#        city  sales
#	1  Amsterdam    300
#	2   New York    200
#	0     London    100

# Calculate total sales again:
print(calculate_total_sales(df))
# Output: 600

1000
        city  sales
1  Amsterdam    300
2   New York    200
0     London    100
600


In [30]:
def create_top_city_leaderboard_better(df):
    return (
        df.dropna(subset=["city"])
          .sort_values(by=["sales"], ascending=False)
    )

df = pd.DataFrame(
    {
        "city": ["London", "Amsterdam", "New York", None],
        "sales": [100, 300, 200, 400],
    }
)

print(calculate_total_sales(df))
# Output: 1000
print(create_top_city_leaderboard_better(df))
# Output:
#        city  sales
#	1  Amsterdam    300
#	2   New York    200
#	0     London    100

# Calculate total sales again:
print(calculate_total_sales(df))
# Output: 1000

1000
        city  sales
1  Amsterdam    300
2   New York    200
0     London    100
1000


#### Understanding .groupby in pandas

In [8]:
df = pd.DataFrame([{'Id' : 'a', 'Val' : 1}, 
                      {'Id' : 'b', 'Val' : 2}, 
                      {'Id' : 'c', 'Val' : 3}, 
                      {'Id' : 'a', 'Val' : 4}, 
                      {'Id' : 'b', 'Val' : 5}, 
                      {'Id' : 'c', 'Val' : 6}, 
                      {'Id' : 'c', 'Val' : 7}])

grouped = df.groupby('Id').sum().reset_index()
display(grouped)

Unnamed: 0,Id,Val
0,a,5
1,b,7
2,c,16


In [9]:
data = {
    'Product': ['A', 'B', 'A', 'B', 'A', 'B'],
    'Category': ['X', 'Y', 'Y', 'X', 'X', 'Y'],
    'Revenue': [100, 150, 200, 120, 180, 130]
}

sales_data = pd.DataFrame(data)

In [10]:
grouped = sales_data.groupby('Product')
total_revenue = grouped['Revenue'].mean()
display(total_revenue)

Product
A    160.000000
B    133.333333
Name: Revenue, dtype: float64

In [13]:
grouped = sales_data.groupby(['Product', 'Category'])
average_revenue = grouped['Revenue'].mean()
display(average_revenue)

average_revenue_no_multiindex = average_revenue.reset_index()
display(average_revenue_no_multiindex)

Product  Category
A        X           140.0
         Y           200.0
B        X           120.0
         Y           140.0
Name: Revenue, dtype: float64

Unnamed: 0,Product,Category,Revenue
0,A,X,140.0
1,A,Y,200.0
2,B,X,120.0
3,B,Y,140.0


In [15]:
# Create a sample DataFrame
data = {
    'Category': ['A', 'B', 'A', 'B', 'A'],
    'Value1': [10, 20, 15, 25, 30],
    'Value2': [5, 10, 8, 12, 15]
}

df = pd.DataFrame(data)

# Group by 'Category' and aggregate 'Value1' with sum and 'Value2' with mean
agg_result = df.groupby('Category')\
                .agg({'Value1': 'sum', 'Value2': 'mean'})

display(agg_result)

Unnamed: 0_level_0,Value1,Value2
Category,Unnamed: 1_level_1,Unnamed: 2_level_1
A,55,9.333333
B,45,11.0


In [16]:
grouped = sales_data.groupby(['Product'])
for product_name, group in grouped:
    display(group)
    # Do specific operations on each group

Unnamed: 0,Product,Category,Revenue
0,A,X,100
2,A,Y,200
4,A,X,180


Unnamed: 0,Product,Category,Revenue
1,B,Y,150
3,B,X,120
5,B,Y,130


In [22]:
# Creating a MultiIndex with two levels
product = ['A', 'A', 'B', 'B']
category = ['X', 'Y', 'X', 'Y']
multi_index = pd.MultiIndex.from_arrays([product, category])
print(multi_index)

MultiIndex([('A', 'X'),
            ('A', 'Y'),
            ('B', 'X'),
            ('B', 'Y')],
           )


In [25]:
revenue = [160, 190, 120, 140]
df = pd.DataFrame(revenue, index = multi_index, columns = ['Revenue'])
display(df)

Unnamed: 0,Unnamed: 1,Revenue
A,X,160
A,Y,190
B,X,120
B,Y,140


In [26]:
# Create sample data
data = {
    'Year': [2019, 2019, 2020, 2020, 2021, 2021],
    'Quarter': [1, 2, 1, 2, 1, 2],
    'Revenue': [100, 120, 110, 130, 105, 125]
}
df = pd.DataFrame(data)

# Create a MultiIndex DataFrame
multiindex_df = df.set_index(['Year', 'Quarter'])

print(multiindex_df)

              Revenue
Year Quarter         
2019 1            100
     2            120
2020 1            110
     2            130
2021 1            105
     2            125


In [27]:
# Access data using MultiIndex
revenue_2020_q1 = multiindex_df.loc[(2020, 1), 'Revenue']
print(revenue_2020_q1)

110


In [29]:
data_2020 = multiindex_df.loc[2020]
display(data_2020)

Unnamed: 0_level_0,Revenue
Quarter,Unnamed: 1_level_1
1,110
2,130


In [30]:
multiindex_df.loc[2020].loc[1].loc['Revenue']

110

In [38]:
# Retrieve all data from Quarter 1 using .xs()
quarter1_data = multiindex_df.xs(key=1, level='Quarter')
print(quarter1_data)

      Revenue
Year         
2019      100
2020      110
2021      105


In [39]:
# Get all data from Quarter 1 using boolean indexing
idxs = multiindex_df.index\
                        .get_level_values('Quarter') == 1
quarter1_data = multiindex_df[idxs]
print(quarter1_data)

              Revenue
Year Quarter         
2019 1            100
2020 1            110
2021 1            105


#### Joins

In [40]:
data = {
    'ID': [1, 2, 3, 4],
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'ManagerID': [3, 3, 4, 4]
}

df = pd.DataFrame(data)

In [42]:
result = pd.merge(df, 
                    df, 
                    left_on='ManagerID', 
                    right_on='ID', 
                    suffixes=('_Employee', '_Manager'))

display(result)

Unnamed: 0,ID_Employee,Name_Employee,ManagerID_Employee,ID_Manager,Name_Manager,ManagerID_Manager
0,1,Alice,3,3,Charlie,4
1,2,Bob,3,3,Charlie,4
2,3,Charlie,4,4,David,4
3,4,David,4,4,David,4


In [44]:
data1 = {'ID': [1, 2, 3], 
            'Name': ['Alice', 'Bob', 'Charlie']}
data2 = {'ID': [3, 4], 'Age': [25, 30]}

df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)

result = pd.merge(df1, df2, on='ID', how='right')
display(result)

Unnamed: 0,ID,Name,Age
0,3,Charlie,25
1,4,,30


In [47]:
result = pd.merge(df1, df2, on='ID', how='outer')
display(result)

Unnamed: 0,ID,Name,Age
0,1,Alice,
1,2,Bob,
2,3,Charlie,25.0
3,4,,30.0


In [48]:
result = pd.merge(df1, df2, on='ID', how='inner')
display(result)

Unnamed: 0,ID,Name,Age
0,3,Charlie,25


In [52]:
# Create employee and project DataFrames
employees = pd.DataFrame({'Employee': ['Alice', 'Bob', 'Charlie']})
projects = pd.DataFrame({'Project': ['Jan', 'Sophia']})

# Add a common key to both DataFrames for the cross join
employees['key'] = 1
projects['key'] = 1

# Perform cross join using merge
meetings = employees.\
                    merge(projects, on='key').\
                    drop(columns='key')

print(meetings)

  Employee Project
0    Alice     Jan
1    Alice  Sophia
2      Bob     Jan
3      Bob  Sophia
4  Charlie     Jan
5  Charlie  Sophia


#### Working with time series

In [58]:
date_pandas = pd.to_datetime('19930712', format = '%Y%m%d')
print(date_pandas)

1993-07-12 00:00:00


In [61]:
verbose_date_str = date_pandas.strftime('%A, %B %d, %Y')
print(verbose_date_str)

Monday, July 12, 1993


In [62]:
df.shift?

#### Vectorized string operations

In [64]:
names = pd.Series(['Alice', 'Bob', 'Charlie', 'David', 'Eve'])

In [66]:
# Convert all names to uppercase
names_upper = names.str.upper()
# Output: ['ALICE', 'BOB', 'CHARLIE', 'DAVID', 'EVE']

# Check if a name contains a specific substring
contains_charlie = names.str.contains('Charlie')
print(contains_charlie)
# Output: [False, False, True, False, False]

# Extract the first two characters from each name
first_two_chars = names.str[:2]
print(first_two_chars)
# Output: ['Al', 'Bo', 'Ch', 'Da', 'Ev']

0    False
1    False
2     True
3    False
4    False
dtype: bool
0    Al
1    Bo
2    Ch
3    Da
4    Ev
dtype: object


In [68]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'Email': ['alice@example.com', 
                    'bob@gmail.com', 
                    'charlie@example.com', 
                    'david@yahoo.com', 
                    'eve@example.com']
}

df = pd.DataFrame(data)

# Extract domain names from email addresses
df['Domain'] = df['Email'].str.split('@').str.get(1)

# Check if a name starts with a specific letter
df['StartsWithA'] = df['Name'].str.startswith('A')

display(df)

Unnamed: 0,Name,Email,Domain,StartsWithA
0,Alice,alice@example.com,example.com,True
1,Bob,bob@gmail.com,gmail.com,False
2,Charlie,charlie@example.com,example.com,False
3,David,david@yahoo.com,yahoo.com,False
4,Eve,eve@example.com,example.com,False
