## Memory

In [7]:
import numpy as np
import sys

ls = [1,2,3,4]
np_arr = np.array(ls)

print("Memory Consumed by Lists: ", sys.getsizeof(ls), "Bytes")
print("Memory Consumed by NumPy: ", np_arr.itemsize*np_arr.size, "Bytes")

Memory Consumed by Lists:  96 Bytes
Memory Consumed by NumPy:  16 Bytes


## Pointers

- ==: Equality Operator
- is: Identity Operator

In [11]:
ls

[1, 2, 3, 4]

In [12]:
np_arr

array([1, 2, 3, 4])

In [14]:
ls[0] == ls[0]

True

In [15]:
np_arr[0] == np_arr[0]

True

In [16]:
ls[0] is ls[0]

True

In [17]:
np_arr[0] is np_arr[0]

False

## Speed

In [19]:
import time

ls1 = [i for i in range(1000000)]
np_arr1 = np.array(ls1)

t1 = time.time()
ls2 = [i+2 for i in ls1]
t2 = time.time()

print("The total time take by lists operations: ", (t2-t1)*1000, "ms")

t3 = time.time()
np_arr2 = np_arr1+2
t4 = time.time()

print("The total time taken by the NumPy array operations ", (t4-t3)*1000, "ms")

The total time take by lists operations:  285.19225120544434 ms
The total time taken by the NumPy array operations  6.004810333251953 ms


## NumPy Basics

In [20]:
ls = [1,2,3,"sumit", True]

In [21]:
np.array(ls)

array(['1', '2', '3', 'sumit', 'True'], dtype='<U11')

In [22]:
ls1 = [1,2,3,4,5,6]
ar1 = np.array(ls1)

In [23]:
# Rank-1/ 1D Array
ar1

array([1, 2, 3, 4, 5, 6])

In [24]:
# This NumPy array is having 6 elements
# Not having rows and columns
ar1.shape

(6,)

In [28]:
# I can't say that ar1 is having 1 row and 6 column. 
ls3 = [[1,2,3,4,5,6]]
ar3 = np.array(ls3)

In [29]:
ar3

array([[1, 2, 3, 4, 5, 6]])

In [30]:
ar3.shape

(1, 6)

In [32]:
np.array([[1], [2], [3], [4], [5], [6]]).shape

(6, 1)

In [25]:
ls = [[1,2,3], [4,5,6], [7,8,9]]
ar2 = np.array(ls)

In [26]:
ar2

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [27]:
# 2-d array
# with 3 rows and 3 columns
ar2.shape

(3, 3)

In [34]:
## 3-D ARRAY

import cv2
img = cv2.imread('mother_teresa.jpg')
img.shape

(600, 640, 3)

### Inspecting a numpy array

In [37]:
ar2

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [40]:
print("Number of Dimensions", ar2.ndim)
print("Shape of array", ar2.shape)
print("Size of the array", ar2.size)
print("Data type of each element of the array", ar2.dtype)
print("Memory consumed by each element of your numpy array", ar2.itemsize, "Bytes")
print("The total memory consumed by numpy array", ar2.nbytes, "Bytes")

Number of Dimensions 2
Shape of array (3, 3)
Size of the array 9
Data type of each element of the array int32
Memory consumed by each element of your numpy array 4 Bytes
The total memory consumed by numpy array 36 Bytes


### Slicing and Indexing Array's

In [41]:
ar1

array([1, 2, 3, 4, 5, 6])

In [42]:
ar1[0]

1

In [43]:
ar1[-1]

6

In [44]:
ar1[0:5]

array([1, 2, 3, 4, 5])

In [45]:
ar2

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [46]:
# ar2[# range of rows , # range of columns]
ar2[0:2, 0:2]

array([[1, 2],
       [4, 5]])

In [47]:
ar2[0::2, 0::2]

array([[1, 3],
       [7, 9]])

In [48]:
ar1

array([1, 2, 3, 4, 5, 6])

In [57]:
ar1[0:1]

array([1])

In [50]:
ar1[::2]
# start from very start
# go upto very end
# extract every altenative. 2nd element

array([1, 3, 5])

In [51]:
ar2

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [52]:
ar2[: , 0]
# All the rows
# COlumns 0

array([1, 4, 7])

In [53]:
ar2[:, :1]
# All the rows
#Col1

array([[1],
       [4],
       [7]])

In [60]:
ar2

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [59]:
ar2[0:1, 1:]
# Row-1
# Start from first column, go upto the last

array([[2, 3]])

In [61]:
ar1

array([1, 2, 3, 4, 5, 6])

In [62]:
ar1[0:3]

array([1, 2, 3])

In [63]:
ar1[0]

1

In [64]:
ar2

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [65]:
ar2[0::2, 0::2]

array([[1, 3],
       [7, 9]])

### Functions

In [66]:
# arange
[i for i in range(1,10)]

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [68]:
# NumPy array with an specified range
np.arange(1,10)

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [69]:
# reshape
np.arange(1,10).reshape(3,3)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [71]:
# returns random number between 0,1 in 3,4 shape
np.random.random((3,4))

array([[0.57763308, 0.22512472, 0.72695682, 0.68546517],
       [0.09463381, 0.65127699, 0.98057907, 0.90279751],
       [0.93628013, 0.52370872, 0.5399838 , 0.25326124]])

In [77]:
# return random integer between 1,10
np.random.randint(10)

5

In [78]:
# 10 random number between -inf to +inf
np.random.randn(10)

array([ 0.20114118,  2.07019721,  1.72877045, -1.0589721 , -0.57458311,
       -1.05943584,  0.4961517 ,  1.12851513, -0.51484289, -1.12557376])

In [80]:
# will return n numbers betwee cretain limit
np.linspace(15, 18, 25)
# starting
# ending
# number of elements

array([15.   , 15.125, 15.25 , 15.375, 15.5  , 15.625, 15.75 , 15.875,
       16.   , 16.125, 16.25 , 16.375, 16.5  , 16.625, 16.75 , 16.875,
       17.   , 17.125, 17.25 , 17.375, 17.5  , 17.625, 17.75 , 17.875,
       18.   ])

## Pandas

In [81]:
import pandas as pd

In [84]:
df = pd.read_csv(r"C:\Users\SUMITH\Python Notebooks\Python Notebooks\upGrad MBA\Live Session\Batch-1\telco_churn.csv")

In [83]:
r"C:\Users\SUMITH\Python Notebooks\Python Notebooks\upGrad MBA\Live Session\Batch-1\telco_churn.csv"

'C:\\Users\\SUMITH\\Python Notebooks\\Python Notebooks\\upGrad MBA\\Live Session\\Batch-1\\telco_churn.csv'

In [85]:
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [88]:
## Extract columns from your data frame
# Col  Name
type(df.customerID)

pandas.core.series.Series

In [90]:
type(df['customerID'])

pandas.core.series.Series

In [91]:
df.customerID

0       7590-VHVEG
1       5575-GNVDE
2       3668-QPYBK
3       7795-CFOCW
4       9237-HQITU
           ...    
7038    6840-RESVB
7039    2234-XADUH
7040    4801-JZAZL
7041    8361-LTMKD
7042    3186-AJIEK
Name: customerID, Length: 7043, dtype: object

### How to extract columns:
- df.colname
- df['colname']

### How to extract the subset out of a dataframe
- indexing
- iloc: index based slicing method
- loc: name based slicing method

In [92]:
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [97]:
# df[['customerID', 'gender', 'SeniorCitizen']].head(100)
df[['customerID', 'gender', 'SeniorCitizen']][:101]

Unnamed: 0,customerID,gender,SeniorCitizen
0,7590-VHVEG,Female,0
1,5575-GNVDE,Male,0
2,3668-QPYBK,Male,0
3,7795-CFOCW,Male,0
4,9237-HQITU,Female,0
...,...,...,...
96,9803-FTJCG,Male,0
97,0278-YXOOG,Male,0
98,3212-KXOCR,Male,0
99,4598-XLKNJ,Female,1


In [100]:
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [101]:
# df.iloc[# range of rows: stepsize, # range of columns: stepsize]
df.iloc[0:100:2, 0::2]

Unnamed: 0,customerID,SeniorCitizen,Dependents,PhoneService,InternetService,OnlineBackup,TechSupport,StreamingMovies,PaperlessBilling,MonthlyCharges,Churn
0,7590-VHVEG,0,No,No,DSL,Yes,No,No,Yes,29.85,No
2,3668-QPYBK,0,No,Yes,DSL,Yes,No,No,Yes,53.85,Yes
4,9237-HQITU,0,No,Yes,Fiber optic,No,No,No,Yes,70.7,Yes
6,1452-KIOVK,0,Yes,Yes,Fiber optic,Yes,No,No,Yes,89.1,No
8,7892-POOKP,0,No,Yes,Fiber optic,No,Yes,Yes,Yes,104.8,Yes
10,9763-GRSKD,0,Yes,Yes,DSL,No,No,No,Yes,49.95,No
12,8091-TTVAX,0,No,Yes,Fiber optic,No,No,Yes,No,100.35,No
14,5129-JLPIS,0,No,Yes,Fiber optic,No,Yes,Yes,Yes,105.5,No
16,8191-XWSZG,0,No,Yes,No,No internet service,No internet service,No internet service,No,20.65,No
18,4190-MFLUW,0,Yes,Yes,DSL,No,Yes,No,No,55.2,Yes


In [102]:
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [103]:
df.iloc[[0,5,7],[2,5] ]

Unnamed: 0,SeniorCitizen,tenure
0,0,1
5,0,8
7,0,10


In [104]:
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [105]:
df.loc[[0, 4], ["customerID", "gender", "Partner"]]

Unnamed: 0,customerID,gender,Partner
0,7590-VHVEG,Female,Yes
4,9237-HQITU,Female,No


In [107]:
df1 = df.set_index("customerID")
df1.head()

Unnamed: 0_level_0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
customerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [108]:
df1.loc[['7590-VHVEG', '9237-HQITU'], ['gender','SeniorCitizen','Partner']]

Unnamed: 0_level_0,gender,SeniorCitizen,Partner
customerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
7590-VHVEG,Female,0,Yes
9237-HQITU,Female,0,No


In [110]:
df

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.30,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.70,151.65,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,6840-RESVB,Male,0,Yes,Yes,24,Yes,Yes,DSL,Yes,...,Yes,Yes,Yes,Yes,One year,Yes,Mailed check,84.80,1990.5,No
7039,2234-XADUH,Female,0,Yes,Yes,72,Yes,Yes,Fiber optic,No,...,Yes,No,Yes,Yes,One year,Yes,Credit card (automatic),103.20,7362.9,No
7040,4801-JZAZL,Female,0,Yes,Yes,11,No,No phone service,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.60,346.45,No
7041,8361-LTMKD,Male,1,Yes,No,4,Yes,Yes,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Mailed check,74.40,306.6,Yes


In [111]:
df.set_index("customerID", inplace = True)
# inplace = True: will make the operation permanent

In [114]:
df.reset_index(inplace = True)

In [115]:
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


## How to filter

In [117]:
df[df['gender']=='Male']

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.30,1840.75,No
6,1452-KIOVK,Male,0,No,Yes,22,Yes,Yes,Fiber optic,No,...,No,No,Yes,No,Month-to-month,Yes,Credit card (automatic),89.10,1949.4,No
9,6388-TABGU,Male,0,No,Yes,62,Yes,No,DSL,Yes,...,No,No,No,No,One year,No,Bank transfer (automatic),56.15,3487.95,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7033,9767-FFLEM,Male,0,No,No,38,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Credit card (automatic),69.50,2625.25,No
7035,8456-QDAVC,Male,0,No,No,19,Yes,No,Fiber optic,No,...,No,No,Yes,No,Month-to-month,Yes,Bank transfer (automatic),78.70,1495.1,No
7038,6840-RESVB,Male,0,Yes,Yes,24,Yes,Yes,DSL,Yes,...,Yes,Yes,Yes,Yes,One year,Yes,Mailed check,84.80,1990.5,No
7041,8361-LTMKD,Male,1,Yes,No,4,Yes,Yes,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Mailed check,74.40,306.6,Yes


In [118]:
df[(df['gender']=='Male') & (df['SeniorCitizen']==0)]

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.30,1840.75,No
6,1452-KIOVK,Male,0,No,Yes,22,Yes,Yes,Fiber optic,No,...,No,No,Yes,No,Month-to-month,Yes,Credit card (automatic),89.10,1949.4,No
9,6388-TABGU,Male,0,No,Yes,62,Yes,No,DSL,Yes,...,No,No,No,No,One year,No,Bank transfer (automatic),56.15,3487.95,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7027,0550-DCXLH,Male,0,No,No,13,Yes,No,DSL,No,...,No,Yes,Yes,Yes,Month-to-month,No,Mailed check,73.35,931.55,No
7033,9767-FFLEM,Male,0,No,No,38,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Credit card (automatic),69.50,2625.25,No
7035,8456-QDAVC,Male,0,No,No,19,Yes,No,Fiber optic,No,...,No,No,Yes,No,Month-to-month,Yes,Bank transfer (automatic),78.70,1495.1,No
7038,6840-RESVB,Male,0,Yes,Yes,24,Yes,Yes,DSL,Yes,...,Yes,Yes,Yes,Yes,One year,Yes,Mailed check,84.80,1990.5,No


In [126]:
df[     (df['gender']=='Male')     &    (df['Partner']=='No')    |  (df['Dependents'] =='No')]

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.30,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.70,151.65,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7035,8456-QDAVC,Male,0,No,No,19,Yes,No,Fiber optic,No,...,No,No,Yes,No,Month-to-month,Yes,Bank transfer (automatic),78.70,1495.1,No
7036,7750-EYXWZ,Female,0,No,No,12,No,No phone service,DSL,No,...,Yes,Yes,Yes,Yes,One year,No,Electronic check,60.65,743.3,No
7037,2569-WGERO,Female,0,No,No,72,Yes,No,No,No internet service,...,No internet service,No internet service,No internet service,No internet service,Two year,Yes,Bank transfer (automatic),21.15,1419.4,No
7041,8361-LTMKD,Male,1,Yes,No,4,Yes,Yes,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Mailed check,74.40,306.6,Yes


In [127]:
df['gender']=='Male'

0       False
1        True
2        True
3        True
4       False
        ...  
7038     True
7039    False
7040    False
7041     True
7042     True
Name: gender, Length: 7043, dtype: bool

In [132]:
df.loc[(df['gender']=='Male'), ['gender', 'Partner']]

Unnamed: 0,gender,Partner
1,Male,No
2,Male,No
3,Male,No
6,Male,No
9,Male,No
...,...,...
7033,Male,No
7035,Male,No
7038,Male,Yes
7041,Male,Yes


## Sort and Groupby

In [134]:
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [135]:
df.sort_values("MonthlyCharges",ascending = False )

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
4586,7569-NMZYQ,Female,0,Yes,Yes,72,Yes,Yes,Fiber optic,Yes,...,Yes,Yes,Yes,Yes,Two year,Yes,Bank transfer (automatic),118.75,8672.45,No
2115,8984-HPEMB,Female,0,No,No,71,Yes,Yes,Fiber optic,Yes,...,Yes,Yes,Yes,Yes,Two year,Yes,Electronic check,118.65,8477.6,No
3894,5989-AXPUC,Female,0,Yes,No,68,Yes,Yes,Fiber optic,Yes,...,Yes,Yes,Yes,Yes,Two year,No,Mailed check,118.60,7990.05,No
4804,5734-EJKXG,Female,0,No,No,61,Yes,Yes,Fiber optic,Yes,...,Yes,Yes,Yes,Yes,One year,Yes,Electronic check,118.60,7365.7,No
5127,8199-ZLLSA,Male,0,No,No,67,Yes,Yes,Fiber optic,Yes,...,Yes,Yes,Yes,Yes,One year,Yes,Bank transfer (automatic),118.35,7804.15,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6906,9945-PSVIP,Female,0,Yes,Yes,25,Yes,No,No,No internet service,...,No internet service,No internet service,No internet service,No internet service,Two year,Yes,Mailed check,18.70,383.65,No
1156,0621-CXBKL,Female,0,No,No,53,Yes,No,No,No internet service,...,No internet service,No internet service,No internet service,No internet service,Two year,No,Mailed check,18.70,1005.7,No
6652,0827-ITJPH,Male,0,No,No,36,Yes,No,No,No internet service,...,No internet service,No internet service,No internet service,No internet service,Two year,Yes,Credit card (automatic),18.55,689,No
1529,9764-REAFF,Female,0,Yes,No,59,Yes,No,No,No internet service,...,No internet service,No internet service,No internet service,No internet service,Two year,No,Bank transfer (automatic),18.40,1057.85,No


In [136]:
df.sort_values(["MonthlyCharges", "TotalCharges"],ascending = [False, True] )

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
4586,7569-NMZYQ,Female,0,Yes,Yes,72,Yes,Yes,Fiber optic,Yes,...,Yes,Yes,Yes,Yes,Two year,Yes,Bank transfer (automatic),118.75,8672.45,No
2115,8984-HPEMB,Female,0,No,No,71,Yes,Yes,Fiber optic,Yes,...,Yes,Yes,Yes,Yes,Two year,Yes,Electronic check,118.65,8477.6,No
4804,5734-EJKXG,Female,0,No,No,61,Yes,Yes,Fiber optic,Yes,...,Yes,Yes,Yes,Yes,One year,Yes,Electronic check,118.60,7365.7,No
3894,5989-AXPUC,Female,0,Yes,No,68,Yes,Yes,Fiber optic,Yes,...,Yes,Yes,Yes,Yes,Two year,No,Mailed check,118.60,7990.05,No
5127,8199-ZLLSA,Male,0,No,No,67,Yes,Yes,Fiber optic,Yes,...,Yes,Yes,Yes,Yes,One year,Yes,Bank transfer (automatic),118.35,7804.15,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1156,0621-CXBKL,Female,0,No,No,53,Yes,No,No,No internet service,...,No internet service,No internet service,No internet service,No internet service,Two year,No,Mailed check,18.70,1005.7,No
6906,9945-PSVIP,Female,0,Yes,Yes,25,Yes,No,No,No internet service,...,No internet service,No internet service,No internet service,No internet service,Two year,Yes,Mailed check,18.70,383.65,No
6652,0827-ITJPH,Male,0,No,No,36,Yes,No,No,No internet service,...,No internet service,No internet service,No internet service,No internet service,Two year,Yes,Credit card (automatic),18.55,689,No
1529,9764-REAFF,Female,0,Yes,No,59,Yes,No,No,No internet service,...,No internet service,No internet service,No internet service,No internet service,Two year,No,Bank transfer (automatic),18.40,1057.85,No


In [137]:
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [139]:
df.groupby("gender").agg({"MonthlyCharges": 'mean','customerID':'count'})

Unnamed: 0_level_0,MonthlyCharges,customerID
gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,65.204243,3488
Male,64.327482,3555


In [141]:
df.groupby("gender")['MonthlyCharges'].mean()

gender
Female    65.204243
Male      64.327482
Name: MonthlyCharges, dtype: float64

In [142]:
pd.pivot_table(df, index = ['gender'], columns = ['Partner'], values = "MonthlyCharges", aggfunc = 'mean')

Partner,No,Yes
gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,63.874111,66.62263
Male,60.058854,68.912398
