In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv')

In [3]:
type(df)

pandas.core.frame.DataFrame

In [4]:
df.head(2)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C


In [5]:
#Datatypes of all the columns

In [6]:
df.dtypes

PassengerId      int64
Survived         int64
Pclass           int64
Name            object
Sex             object
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked        object
dtype: object

In [7]:
#Initial insights about the data

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


In [9]:
#Above we can see that the Age and Cabin columns have less number of values, 
#which implies that they have a lot of null values for which we will do imputation

In [10]:
df.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,257.353842,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,0.0,2.0,20.125,0.0,0.0,7.9104
50%,446.0,0.0,3.0,28.0,0.0,0.0,14.4542
75%,668.5,1.0,3.0,38.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.3292


In [11]:
df.columns #Not considered all the columns, only numerical columns are present

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [12]:
#To filter out columns where the datatype is categorical/object

In [13]:
df.dtypes

PassengerId      int64
Survived         int64
Pclass           int64
Name            object
Sex             object
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked        object
dtype: object

In [14]:
df.dtypes == "object"

PassengerId    False
Survived       False
Pclass         False
Name            True
Sex             True
Age            False
SibSp          False
Parch          False
Ticket          True
Fare           False
Cabin           True
Embarked        True
dtype: bool

In [15]:
#List of column names 

In [16]:
type(df.dtypes)  #Series is equivalent to the list, only difference is that series will also show index names 

pandas.core.series.Series

In [17]:
df.dtypes[df.dtypes == "object"]

Name        object
Sex         object
Ticket      object
Cabin       object
Embarked    object
dtype: object

In [18]:
l = [4,5,6,7,8,9,0, "sudh", "kumar"]

In [19]:
l[0]

4

In [20]:
l[l == 8]

4

In [21]:
col_name = df.dtypes[df.dtypes == "object"].index

In [22]:
col_name

Index(['Name', 'Sex', 'Ticket', 'Cabin', 'Embarked'], dtype='object')

In [23]:
df[col_name]

Unnamed: 0,Name,Sex,Ticket,Cabin,Embarked
0,"Braund, Mr. Owen Harris",male,A/5 21171,,S
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,PC 17599,C85,C
2,"Heikkinen, Miss. Laina",female,STON/O2. 3101282,,S
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,113803,C123,S
4,"Allen, Mr. William Henry",male,373450,,S
...,...,...,...,...,...
886,"Montvila, Rev. Juozas",male,211536,,S
887,"Graham, Miss. Margaret Edith",female,112053,B42,S
888,"Johnston, Miss. Catherine Helen ""Carrie""",female,W./C. 6607,,S
889,"Behr, Mr. Karl Howell",male,111369,C148,C


In [24]:
df[col_name].describe()

Unnamed: 0,Name,Sex,Ticket,Cabin,Embarked
count,891,891,891,204,889
unique,891,2,681,147,3
top,"Andersson, Miss. Erna Alexandra",male,CA. 2343,B96 B98,S
freq,1,577,7,4,644


In [25]:
#Pandas profiling: Get deep insights of data

In [26]:
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [27]:
#To add a new column in the dataset

In [28]:
df["ineuron"] = "sudh"

In [29]:
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,ineuron
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S,sudh
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,sudh
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S,sudh
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S,sudh
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S,sudh
...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S,sudh
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S,sudh
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S,sudh
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C,sudh


In [30]:
#To extract single/multiple columns

In [31]:
df["Name"]

0                                Braund, Mr. Owen Harris
1      Cumings, Mrs. John Bradley (Florence Briggs Th...
2                                 Heikkinen, Miss. Laina
3           Futrelle, Mrs. Jacques Heath (Lily May Peel)
4                               Allen, Mr. William Henry
                             ...                        
886                                Montvila, Rev. Juozas
887                         Graham, Miss. Margaret Edith
888             Johnston, Miss. Catherine Helen "Carrie"
889                                Behr, Mr. Karl Howell
890                                  Dooley, Mr. Patrick
Name: Name, Length: 891, dtype: object

In [32]:
#Give first 15 records

In [33]:
df["Name"][0:15]  #Slicing just like in list

0                               Braund, Mr. Owen Harris
1     Cumings, Mrs. John Bradley (Florence Briggs Th...
2                                Heikkinen, Miss. Laina
3          Futrelle, Mrs. Jacques Heath (Lily May Peel)
4                              Allen, Mr. William Henry
5                                      Moran, Mr. James
6                               McCarthy, Mr. Timothy J
7                        Palsson, Master. Gosta Leonard
8     Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)
9                   Nasser, Mrs. Nicholas (Adele Achem)
10                      Sandstrom, Miss. Marguerite Rut
11                             Bonnell, Miss. Elizabeth
12                       Saundercock, Mr. William Henry
13                          Andersson, Mr. Anders Johan
14                 Vestrom, Miss. Hulda Amanda Adolfina
Name: Name, dtype: object

In [34]:
#Aternate records

In [35]:
df["Name"][0:15:2]

0                               Braund, Mr. Owen Harris
2                                Heikkinen, Miss. Laina
4                              Allen, Mr. William Henry
6                               McCarthy, Mr. Timothy J
8     Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)
10                      Sandstrom, Miss. Marguerite Rut
12                       Saundercock, Mr. William Henry
14                 Vestrom, Miss. Hulda Amanda Adolfina
Name: Name, dtype: object

In [36]:
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,ineuron
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S,sudh
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,sudh
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S,sudh
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S,sudh
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S,sudh
...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S,sudh
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S,sudh
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S,sudh
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C,sudh


In [37]:
#Whether we have null values and their indexes in a age column

In [38]:
df["Age"].isnull()

0      False
1      False
2      False
3      False
4      False
       ...  
886    False
887    False
888     True
889    False
890    False
Name: Age, Length: 891, dtype: bool

In [39]:
df[df["Age"].isnull() == True].index

Int64Index([  5,  17,  19,  26,  28,  29,  31,  32,  36,  42,
            ...
            832, 837, 839, 846, 849, 859, 863, 868, 878, 888],
           dtype='int64', length=177)

In [40]:
#How many places we have a null values

In [41]:
len(df[df["Age"].isnull() == True].index)

177

In [42]:
#Show entire 5th record with null values: Row wise

In [43]:
ind = df[df["Age"].isnull() == True].index

In [44]:
df.loc[ind]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,ineuron
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q,sudh
17,18,1,2,"Williams, Mr. Charles Eugene",male,,0,0,244373,13.0000,,S,sudh
19,20,1,3,"Masselmani, Mrs. Fatima",female,,0,0,2649,7.2250,,C,sudh
26,27,0,3,"Emir, Mr. Farred Chehab",male,,0,0,2631,7.2250,,C,sudh
28,29,1,3,"O'Dwyer, Miss. Ellen ""Nellie""",female,,0,0,330959,7.8792,,Q,sudh
...,...,...,...,...,...,...,...,...,...,...,...,...,...
859,860,0,3,"Razi, Mr. Raihed",male,,0,0,2629,7.2292,,C,sudh
863,864,0,3,"Sage, Miss. Dorothy Edith ""Dolly""",female,,8,2,CA. 2343,69.5500,,S,sudh
868,869,0,3,"van Melkebeke, Mr. Philemon",male,,0,0,345777,9.5000,,S,sudh
878,879,0,3,"Laleff, Mr. Kristo",male,,0,0,349217,7.8958,,S,sudh


In [45]:
df[df["Age"].isnull()]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,ineuron
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q,sudh
17,18,1,2,"Williams, Mr. Charles Eugene",male,,0,0,244373,13.0000,,S,sudh
19,20,1,3,"Masselmani, Mrs. Fatima",female,,0,0,2649,7.2250,,C,sudh
26,27,0,3,"Emir, Mr. Farred Chehab",male,,0,0,2631,7.2250,,C,sudh
28,29,1,3,"O'Dwyer, Miss. Ellen ""Nellie""",female,,0,0,330959,7.8792,,Q,sudh
...,...,...,...,...,...,...,...,...,...,...,...,...,...
859,860,0,3,"Razi, Mr. Raihed",male,,0,0,2629,7.2292,,C,sudh
863,864,0,3,"Sage, Miss. Dorothy Edith ""Dolly""",female,,8,2,CA. 2343,69.5500,,S,sudh
868,869,0,3,"van Melkebeke, Mr. Philemon",male,,0,0,345777,9.5000,,S,sudh
878,879,0,3,"Laleff, Mr. Kristo",male,,0,0,349217,7.8958,,S,sudh


In [46]:
#Try to find out a person name who has paid the highest fair

In [47]:
df["Fare"]

0       7.2500
1      71.2833
2       7.9250
3      53.1000
4       8.0500
        ...   
886    13.0000
887    30.0000
888    23.4500
889    30.0000
890     7.7500
Name: Fare, Length: 891, dtype: float64

In [48]:
max(df["Fare"])

512.3292

In [49]:
df[df["Fare"] == max(df["Fare"])]["Name"]

258                      Ward, Miss. Anna
679    Cardeza, Mr. Thomas Drake Martinez
737                Lesurer, Mr. Gustave J
Name: Name, dtype: object

In [50]:
#Find out how many male and female were onboarded
#How many survived we have
#How many casualty we have
#What is name of a person who is the eldest one 
#How many passenger do we have in first, second, third class
# How many person we have whose name start with s
#try to create new column which is summation of "Sibsp" and "Parch"
#How many person do we have below age 25
#Casualties of people with age less than 40
#From a cabin column seperate text and numeric value


In [36]:
#Value_counts method

df["Sex"].value_counts()

male      577
female    314
Name: Sex, dtype: int64

In [39]:
len(df[df["Sex"] == "male"])

577

In [40]:
len(df[df["Sex"] == "female"])

314

In [43]:
df.groupby("Sex")["Sex"].count()

Sex
female    314
male      577
Name: Sex, dtype: int64

In [45]:
df["Survived"].value_counts()

0    549
1    342
Name: Survived, dtype: int64

In [48]:
 len(df[df["Survived"] == 1])

342

In [49]:
 len(df[df["Survived"] == 0])

549

In [53]:
df[df["Age"] == df["Age"].max()]["Name"]

630    Barkworth, Mr. Algernon Henry Wilson
Name: Name, dtype: object

In [54]:
df["Pclass"].value_counts()

3    491
1    216
2    184
Name: Pclass, dtype: int64

In [57]:
df.groupby("Pclass")["Pclass"].count()

Pclass
1    216
2    184
3    491
Name: Pclass, dtype: int64

In [54]:
len(df[df["Name"].str.startswith("S")]["Name"])

86

In [55]:
df["New_columnname"] = df["SibSp"] + df["Parch"]

In [56]:
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,ineuron,New_columnname
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S,sudh,1
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,sudh,1
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S,sudh,0
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S,sudh,1
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S,sudh,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S,sudh,0
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S,sudh,0
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S,sudh,3
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C,sudh,0


In [59]:
len(df[df["Age"] < 25])

278

In [62]:
len(df[(df["Survived"] == 0) & (df["Age"] < 40)])

322

In [67]:
import re

replace = re.compile("([a-zA-Z]+)")

df["string_cabin_list"] = df["Cabin"].str.extract(replace)
df["num_cabin_list"] = df["Cabin"].str.replace(replace,"")

In [66]:
df["string_cabin_list"]

0      NaN
1        C
2      NaN
3        C
4      NaN
      ... 
886    NaN
887      B
888    NaN
889      C
890    NaN
Name: string_cabin_list, Length: 891, dtype: object

In [68]:
df["num_cabin_list"]

0      NaN
1       85
2      NaN
3      123
4      NaN
      ... 
886    NaN
887     42
888    NaN
889    148
890    NaN
Name: num_cabin_list, Length: 891, dtype: object

In [69]:
df["cabin_Number"] = df["Cabin"].str.replace("([A-Za-z]+)", "")
df["cabin_Letter"] = df["Cabin"].str.extract("([A-Za-z]+)")

  df["cabin_Number"] = df["Cabin"].str.replace("([A-Za-z]+)", "")


In [66]:
df[df["Sex"] == "female"].index

Int64Index([  1,   2,   3,   8,   9,  10,  11,  14,  15,  18,
            ...
            866, 871, 874, 875, 879, 880, 882, 885, 887, 888],
           dtype='int64', length=314)

In [71]:
df[df["Sex"] == "male"].index

Int64Index([  0,   4,   5,   6,   7,  12,  13,  16,  17,  20,
            ...
            873, 876, 877, 878, 881, 883, 884, 886, 889, 890],
           dtype='int64', length=577)

In [72]:
df[df["Survived"] == 1].index

Int64Index([  1,   2,   3,   8,   9,  10,  11,  15,  17,  19,
            ...
            865, 866, 869, 871, 874, 875, 879, 880, 887, 889],
           dtype='int64', length=342)

In [73]:
df[df["Survived"] == 0].index

Int64Index([  0,   4,   5,   6,   7,  12,  13,  14,  16,  18,
            ...
            877, 878, 881, 882, 883, 884, 885, 886, 888, 890],
           dtype='int64', length=549)

In [76]:
df[df["Age"] == max(df["Age"])]["Name"]

630    Barkworth, Mr. Algernon Henry Wilson
Name: Name, dtype: object

In [81]:
max(df["Age"])

80.0

In [73]:
df1 = pd.read_csv(r"C:\\Users\\Lakshya\\Downloads\\bank\\bank.csv", sep=";")

In [71]:
#How many avilable in the dataset
#How many users we have with housing and personal loans
#How many person do we have with age 60+
#In which month we have targeted most of te customer
#which mode of call is giving you more result 
#How many entrepeuners do we have n this list
#How many customers do we have with negative balance 
#prepare a group of data based on education level
 

In [74]:
df1

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,30,unemployed,married,primary,no,1787,no,no,cellular,19,oct,79,1,-1,0,unknown,no
1,33,services,married,secondary,no,4789,yes,yes,cellular,11,may,220,1,339,4,failure,no
2,35,management,single,tertiary,no,1350,yes,no,cellular,16,apr,185,1,330,1,failure,no
3,30,management,married,tertiary,no,1476,yes,yes,unknown,3,jun,199,4,-1,0,unknown,no
4,59,blue-collar,married,secondary,no,0,yes,no,unknown,5,may,226,1,-1,0,unknown,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4516,33,services,married,secondary,no,-333,yes,no,cellular,30,jul,329,5,-1,0,unknown,no
4517,57,self-employed,married,tertiary,yes,-3313,yes,yes,unknown,9,may,153,1,-1,0,unknown,no
4518,57,technician,married,secondary,no,295,no,no,cellular,19,aug,151,11,-1,0,unknown,no
4519,28,blue-collar,married,secondary,no,1137,no,no,cellular,6,feb,129,4,211,3,other,no


In [76]:
len(df1["campaign"].unique())

32

In [77]:
len(set(list(df1["campaign"])))

32

In [84]:
len(df1[(df1["housing"] == "yes") & (df1["loan"] == "yes")])

406

In [88]:
df2 = (df1[(df1["housing"] != "yes") & (df1["loan"] != "yes")])

In [89]:
df2

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,30,unemployed,married,primary,no,1787,no,no,cellular,19,oct,79,1,-1,0,unknown,no
5,35,management,single,tertiary,no,747,no,no,cellular,23,feb,141,2,176,3,failure,no
12,36,technician,married,tertiary,no,1109,no,no,cellular,13,aug,328,2,-1,0,unknown,no
13,20,student,single,secondary,no,502,no,no,cellular,30,apr,261,1,-1,0,unknown,yes
16,56,technician,married,secondary,no,4073,no,no,cellular,27,aug,239,5,-1,0,unknown,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4506,42,unemployed,divorced,tertiary,no,-166,no,no,cellular,29,aug,85,4,-1,0,unknown,no
4509,51,technician,married,tertiary,no,2506,no,no,cellular,30,nov,210,3,-1,0,unknown,no
4513,49,blue-collar,married,secondary,no,322,no,no,cellular,14,aug,356,2,-1,0,unknown,no
4518,57,technician,married,secondary,no,295,no,no,cellular,19,aug,151,11,-1,0,unknown,no


In [92]:
df1["month"].value_counts().max()

1398

In [94]:
df1["month"].value_counts().index[0]

'may'

In [97]:
df_groupby = df1.groupby(["contact", "y"]).count()

In [102]:
df_groupby.loc["cellular", "yes"][0]

416

In [103]:
df_groupby.loc["telephone", "yes"][0]

44

In [104]:
df_groupby.loc["unknown", "yes"][0]

61

In [106]:
max(df_groupby.loc["cellular", "yes"][0], df_groupby.loc["telephone", "yes"][0], df_groupby.loc["unknown", "yes"][0])

416

In [109]:
df1["job"].value_counts()

management       969
blue-collar      946
technician       768
admin.           478
services         417
retired          230
self-employed    183
entrepreneur     168
unemployed       128
housemaid        112
student           84
unknown           38
Name: job, dtype: int64

In [110]:
len(df1[df1["job"] == "entrepreneur"])

168

In [113]:
df1[df1.balance < 0]

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
9,43,services,married,primary,no,-88,yes,yes,cellular,17,apr,313,1,147,2,failure,no
18,25,blue-collar,single,primary,no,-221,yes,no,unknown,23,may,250,1,-1,0,unknown,no
48,32,entrepreneur,single,primary,yes,-849,yes,yes,cellular,4,feb,204,1,-1,0,unknown,no
60,41,blue-collar,married,primary,no,-516,no,yes,telephone,8,jul,554,3,-1,0,unknown,no
92,27,services,single,secondary,no,-195,yes,no,cellular,18,may,391,1,-1,0,unknown,yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4474,44,blue-collar,married,primary,no,-237,yes,no,telephone,10,jul,73,1,-1,0,unknown,no
4502,42,services,married,secondary,no,-91,yes,yes,cellular,5,feb,43,1,-1,0,unknown,no
4506,42,unemployed,divorced,tertiary,no,-166,no,no,cellular,29,aug,85,4,-1,0,unknown,no
4516,33,services,married,secondary,no,-333,yes,no,cellular,30,jul,329,5,-1,0,unknown,no


In [114]:
df1.groupby("education").count()

Unnamed: 0_level_0,age,job,marital,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
education,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
primary,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678
secondary,2306,2306,2306,2306,2306,2306,2306,2306,2306,2306,2306,2306,2306,2306,2306,2306
tertiary,1350,1350,1350,1350,1350,1350,1350,1350,1350,1350,1350,1350,1350,1350,1350,1350
unknown,187,187,187,187,187,187,187,187,187,187,187,187,187,187,187,187


In [117]:
for i in df1["education"].unique():
    i = pd.DataFrame(df1[df1["education"] == i])

In [118]:
df1["education"].unique()

array(['primary', 'secondary', 'tertiary', 'unknown'], dtype=object)

In [120]:
l = []
for i in df1["education"].unique():
    l.append(df1[df1["education"] == i])


In [121]:
len(l)

4

In [123]:
l[0]

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,30,unemployed,married,primary,no,1787,no,no,cellular,19,oct,79,1,-1,0,unknown,no
9,43,services,married,primary,no,-88,yes,yes,cellular,17,apr,313,1,147,2,failure,no
18,25,blue-collar,single,primary,no,-221,yes,no,unknown,23,may,250,1,-1,0,unknown,no
26,55,blue-collar,married,primary,no,627,yes,no,unknown,5,may,247,1,-1,0,unknown,no
36,78,retired,divorced,primary,no,229,no,no,telephone,22,oct,97,1,-1,0,unknown,yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4480,23,blue-collar,married,primary,no,1158,yes,no,cellular,16,apr,743,1,-1,0,unknown,no
4485,53,blue-collar,married,primary,no,238,yes,no,cellular,11,may,238,2,361,5,failure,no
4486,37,blue-collar,married,primary,no,378,yes,no,unknown,9,may,514,1,-1,0,unknown,no
4499,45,blue-collar,divorced,primary,no,942,no,no,cellular,21,nov,362,1,-1,0,unknown,no


In [124]:
l[1]

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
1,33,services,married,secondary,no,4789,yes,yes,cellular,11,may,220,1,339,4,failure,no
4,59,blue-collar,married,secondary,no,0,yes,no,unknown,5,may,226,1,-1,0,unknown,no
7,39,technician,married,secondary,no,147,yes,no,cellular,6,may,151,2,-1,0,unknown,no
10,39,services,married,secondary,no,9374,yes,no,unknown,20,may,273,1,-1,0,unknown,no
11,43,admin.,married,secondary,no,264,yes,no,cellular,17,apr,113,2,-1,0,unknown,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4514,38,blue-collar,married,secondary,no,1205,yes,no,cellular,20,apr,45,4,153,1,failure,no
4515,32,services,single,secondary,no,473,yes,no,cellular,7,jul,624,5,-1,0,unknown,no
4516,33,services,married,secondary,no,-333,yes,no,cellular,30,jul,329,5,-1,0,unknown,no
4518,57,technician,married,secondary,no,295,no,no,cellular,19,aug,151,11,-1,0,unknown,no


In [125]:
l[2]

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
2,35,management,single,tertiary,no,1350,yes,no,cellular,16,apr,185,1,330,1,failure,no
3,30,management,married,tertiary,no,1476,yes,yes,unknown,3,jun,199,4,-1,0,unknown,no
5,35,management,single,tertiary,no,747,no,no,cellular,23,feb,141,2,176,3,failure,no
6,36,self-employed,married,tertiary,no,307,yes,no,cellular,14,may,341,1,330,2,other,no
8,41,entrepreneur,married,tertiary,no,221,yes,no,unknown,14,may,57,2,-1,0,unknown,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4501,34,management,married,tertiary,no,297,yes,no,cellular,26,aug,63,4,-1,0,unknown,no
4506,42,unemployed,divorced,tertiary,no,-166,no,no,cellular,29,aug,85,4,-1,0,unknown,no
4509,51,technician,married,tertiary,no,2506,no,no,cellular,30,nov,210,3,-1,0,unknown,no
4517,57,self-employed,married,tertiary,yes,-3313,yes,yes,unknown,9,may,153,1,-1,0,unknown,no


In [119]:
l = []
for i in range(1,51):
    if (i%3 == 0) and (i%5 == 0):
        l.append("BuzzFizz")
    elif (i%3 == 0):
        l.append("Buzz")
    elif (i%5 == 0):
        l.append("Fizz")
    else:
        l.append(i)
print(l)

[1, 2, 'Buzz', 4, 'Fizz', 'Buzz', 7, 8, 'Buzz', 'Fizz', 11, 'Buzz', 13, 14, 'BuzzFizz', 16, 17, 'Buzz', 19, 'Fizz', 'Buzz', 22, 23, 'Buzz', 'Fizz', 26, 'Buzz', 28, 29, 'BuzzFizz', 31, 32, 'Buzz', 34, 'Fizz', 'Buzz', 37, 38, 'Buzz', 'Fizz', 41, 'Buzz', 43, 44, 'BuzzFizz', 46, 47, 'Buzz', 49, 'Fizz']
