In [None]:
import pandas as pd
import matplotlib.pyplot as plt

file_path = 'NationalNames.csv'
data = pd.read_csv(file_path)

def get_first_8_rows(data):
    return data.head(8)

def get_last_8_rows(data):
    return data.tail(8)

def get_columns(data):
    return data.columns

def get_unique_names_count(data):
    return data['Name'].nunique()

def get_unique_names_per_year(data):
    return data.groupby('Year')['Name'].nunique()

def get_year_with_max_unique_names(unique_names_per_year):
    return unique_names_per_year.idxmax()

def get_most_popular_name_in_max_year(data, year_with_max_unique_names):
    data_for_max_year = data[data['Year'] == year_with_max_unique_names]
    return data_for_max_year.loc[data_for_max_year['Count'].idxmax()]

def get_jacob_female_year(data):
    jacob_female = data[(data['Name'] == 'Jacob') & (data['Gender'] == 'F')]
    if not jacob_female.empty:
        jacob_female_year = jacob_female.loc[jacob_female['Count'].idxmax()]
        return jacob_female_year['Year']
    else:
        return None

def get_year_with_most_neutral_names(data):
    gender_neutral_names = data.groupby(['Year', 'Name']).filter(lambda x: x['Gender'].nunique() == 2)
    gender_neutral_counts = gender_neutral_names.groupby('Year')['Name'].nunique()
    return gender_neutral_counts.idxmax()

def get_total_births_per_year(data):
    return data.groupby('Year')['Count'].sum()

def get_year_with_most_births(total_births_per_year):
    return total_births_per_year.idxmax()

def get_births_by_gender(data):
    return data.groupby(['Year', 'Gender'])['Count'].sum().unstack()

def get_years_more_girls(births_by_gender):
    return (births_by_gender['F'] > births_by_gender['M']).sum()

def get_barbara_male_count(data):
    return data[(data['Name'] == 'Barbara') & (data['Gender'] == 'M')]['Count'].sum()

def get_num_years(data):
    return data['Year'].nunique()

def get_popular_neutral_names(data, num_years):
    gender_neutral_names = data.groupby(['Year', 'Name']).filter(lambda x: x['Gender'].nunique() == 2)
    neutral_names_per_year = gender_neutral_names.groupby('Name')['Year'].nunique()
    return neutral_names_per_year[neutral_names_per_year == num_years]

def get_least_popular_name(data):
    least_popular_names = data.groupby('Name')['Count'].sum()
    return least_popular_names.idxmax()

def plot_john_mary_distribution(data):
    john_female = data[(data['Name'] == 'John') & (data['Gender'] == 'F')]
    mary_male = data[(data['Name'] == 'Mary') & (data['Gender'] == 'M')]

    plt.figure(figsize=(10, 6))
    plt.plot(john_female['Year'], john_female['Count'], label='John (жінки)', color='blue')
    plt.plot(mary_male['Year'], mary_male['Count'], label='Mary (чоловіки)', color='green')
    plt.title('Розподіл кількості імен John (жінки) та Mary (чоловіки) по роках')
    plt.xlabel('Рік')
    plt.ylabel('Кількість')
    plt.legend()
    plt.grid()
    plt.show()


In [None]:
first_8_rows = get_first_8_rows(data)
first_8_rows


In [9]:
last_8_rows = get_last_8_rows(data)
last_8_rows


Unnamed: 0,Id,Name,Year,Gender,Count
1825425,1825426,Zo,2014,M,5
1825426,1825427,Zyeir,2014,M,5
1825427,1825428,Zyel,2014,M,5
1825428,1825429,Zykeem,2014,M,5
1825429,1825430,Zymeer,2014,M,5
1825430,1825431,Zymiere,2014,M,5
1825431,1825432,Zyran,2014,M,5
1825432,1825433,Zyrin,2014,M,5


In [11]:
columns = get_columns(data)
columns


Index(['Id', 'Name', 'Year', 'Gender', 'Count'], dtype='object')

In [13]:
unique_names_count = get_unique_names_count(data)
unique_names_count


93889

In [15]:
unique_names_per_year = get_unique_names_per_year(data)
unique_names_per_year


Year
1880     1889
1881     1830
1882     2012
1883     1962
1884     2158
        ...  
2010    31603
2011    31418
2012    31236
2013    30774
2014    30579
Name: Name, Length: 135, dtype: int64

In [17]:
year_with_max_unique_names = get_year_with_max_unique_names(unique_names_per_year)
year_with_max_unique_names


np.int64(2008)

In [19]:
most_popular_name_max_year = get_most_popular_name_in_max_year(data, year_with_max_unique_names)
most_popular_name_max_year


Id        1608303
Name        Jacob
Year         2008
Gender          M
Count       22568
Name: 1608302, dtype: object

In [21]:
jacob_female_year = get_jacob_female_year(data)
jacob_female_year


np.int64(2004)

In [39]:
year_with_most_neutral = get_year_with_most_neutral_names(data)
year_with_most_neutral


np.int64(2008)

In [41]:
total_births_per_year = get_total_births_per_year(data)
total_births_per_year


Year
1880     201484
1881     192699
1882     221538
1883     216950
1884     243467
         ...   
2010    3686589
2011    3646730
2012    3643336
2013    3626802
2014    3670151
Name: Count, Length: 135, dtype: int64

In [42]:
year_with_most_births = get_year_with_most_births(total_births_per_year)
year_with_most_births


np.int64(1957)

In [43]:
births_by_gender = get_births_by_gender(data)
births_by_gender


Gender,F,M
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
1880,90993,110491
1881,91954,100745
1882,107850,113688
1883,112321,104629
1884,129022,114445
...,...,...
2010,1772738,1913851
2011,1753500,1893230
2012,1753922,1889414
2013,1745339,1881463


In [44]:
years_more_girls = get_years_more_girls(births_by_gender)
years_more_girls


np.int64(54)

In [45]:
barbara_male_count = get_barbara_male_count(data)
barbara_male_count


np.int64(4139)

In [46]:
num_years = get_num_years(data)
num_years


135

In [None]:
popular_neutral_names = get_popular_neutral_names(data, num_years)
popular_neutral_names


In [None]:
least_popular_max = get_least_popular_name(data)
least_popular_max


In [None]:
plot_john_mary_distribution(data)
