In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Steps
1. [Data Cleaning](#1)
1. [Understanding The Data](#2)
  1. [Columns Descriptions](#2.0)
  1. [July](#2.1)
    1. [How many transactions happened in July](#2.1.1)
    1. [How many customers made transactions in July](#2.1.2)
    1. [Which category were the most transactions in July](#2.1.3)
    1. [Which category has the highest expenditure amount in July](#2.1.4)
    1. [Who is the customer with the highest number of transactions in July](#2.1.5)
    1. [Who is the customer with the highest transaction volume in July](#2.1.6)
    1. [Credito and Debito comparison in July](#2.1.7)
    1. [Comparison of installment transactions in July](#2.1.8)
    1. [How many customer paid to Netflix in July](#2.1.9)
    1. [In July, how many people used UBER and how much total payment was made to the UBER](#2.1.10)
    1. [In July, End-of-month operating balances for each customer](#2.1.11)

   1. [August](#2.2)
    1. [How many transactions happened in August](#2.2.1)
    1. [How many customers made transactions in August](#2.2.2)
    1. [Which category were the most transactions in August](#2.2.3)
    1. [Which category has the highest expenditure amount in August](#2.2.4)
    1. [Who is the customer with the highest number of transactions in August](#2.2.5)
    1. [Who is the customer with the highest transaction volume in August](#2.2.6)
    1. [Credito and Debito comparison in August](#2.2.7)
    1. [Comparison of installment transactions in August](#2.2.8)
    1. [How many customer paid to Netflix in August](#2.2.9)
    1. [In August, how many people used UBER and how much total payment was made to the UBER](#2.2.10)
    1. [In August, End-of-month operating balances for each customer](#2.2.11)

  1. [September](#2.3)
    1. [How many transactions happened in September](#2.3.1)
    1. [How many customers made transactions in September](#2.3.2)
    1. [Which category were the most transactions in September](#2.3.3)
    1. [Which category has the highest expenditure amount in September](#2.3.4)
    1. [Who is the customer with the highest number of transactions in September](#2.3.5)
    1. [Who is the customer with the highest transaction volume in September](#2.3.6)
    1. [Credito and Debito comparison in September](#2.3.7)
    1. [Comparison of installment transactions in September](#2.3.8)
    1. [How many customer paid to Netflix in September](#2.3.9)
    1. [In September, how many people used UBER and how much total payment was made to the UBER](#2.3.10)
    1. [In September, End-of-month operating balances for each customer](#2.3.11)

  1. [October](#2.4)
    1. [How many transactions happened in October](#2.4.1)
    1. [How many customers made transactions in October](#2.4.2)
    1. [Which category were the most transactions in October](#2.4.3)
    1. [Which category has the highest expenditure amount in October](#2.4.4)
    1. [Who is the customer with the highest number of transactions in October](#2.4.5)
    1. [Who is the customer with the highest transaction volume in October](#2.4.6)
    1. [Credito and Debito comparison in October](#2.4.7)
    1. [Comparison of installment transactions in October](#2.4.8)
    1. [How many customer paid to Netflix in October](#2.4.9)
    1. [In October, how many people used UBER and how much total payment was made to the UBER](#2.4.10)
    1. [In October, End-of-month operating balances for each customer](#2.4.11)
1. [Comparison of Data Sets](#3)
  1. [Comparison of monthly transactions number](#3.1)
  1. [How many customers made transactions each month](#3.2)
  1. [Compare the category number of transactions](#3.3)
  1. [Comparison of monthly transaction amounts by categories](#3.4)
  1. [Comparison of customers transaction numbers](#3.5)
  1. [Comparison of the customer with the highest transaction volume](#3.6)
  1. [Comparison of installment transactions](#3.7)
  1. [Comparison of Number of customers who made Netflix transactions](#3.8)
  1. [Comparison of the total amounts spent on Netflix](#3.9)
  1. [Comparison of Number of customers who made UBER transactions](#3.10)
  1. [Comparison of the total amounts spent on UBER](#3.11)
  1. [Comparison of customers month-end balance](#3.12)
1. [Data Analysis](#4)
  1. [Week 1](#4.1)
    1. [Netflix](#4.1.1)
        1. [How many customer paid to Netflix in October](#4.1.1.1)
        1. [How much was paid on Netflix in October?](#4.1.1.2)
        1. [Comparison of payment channels made to Netflix in October](#4.1.1.3)
        1. [Customers paying multiple times to Netflix in October](#4.1.1.4)
        1. [Comparing customers who made 2 or more payments to Netflix by months](#4.1.1.5)

<a id="1"></a> <br>
# Data Cleaning
<font color='green'>
At this stage, I am editing the data to prepare it for visualization and understanding the data



In [None]:
july = pd.read_csv('../input/brazil-bank-account-spending-dataset/july.csv',index_col=0)
august = pd.read_csv('../input/brazil-bank-account-spending-dataset/august.csv',index_col=0)
september = pd.read_csv('../input/brazil-bank-account-spending-dataset/september.csv',index_col=0)
october = pd.read_csv('../input/brazil-bank-account-spending-dataset/october.csv',index_col=0)

In [None]:

# ***********************************************************
#                         July
# ***********************************************************


# -----------------------------------------------------------
# The arrangement of the order of the columns
# -----------------------------------------------------------
'''
july.columns = ['eventId', 'originId', 'eventCreateDate', 'originSystem', 'operation',
       'isChargeback', 'operationType', 'operationCode', 'originCreateDate',
       'userId', 'accountId', 'accountType', 'receipt', 'accountId',
       'accountType', 'cooperativeId', 'accountName', 'amount', 'installment',
       'installmentAmount', 'originalCurrencyAmount', 'originalCurrency',
       'description', 'category', 'customTransaction', 'flagIncomeTax',
       'hashtag', 'costType', 'description.value', 'description.date',
       'category.value', 'category.name', 'category.imageCode',
       'category.date', 'hashtag.value', 'hashtag.date', 'originUserId',
       'destinyUserId', 'storeName', 'cardMCC']
'''

# -----------------------------------------------------------
# I convert negative amount values ​​to positive values ​​and create a new column
# -----------------------------------------------------------

july['new_amount'] = july['amount'].abs()


# -----------------------------------------------------------
# I convert the time column to DateTime format
# -----------------------------------------------------------

july['eventCreateDate'] = pd.to_datetime(july['eventCreateDate']) 

In [None]:
# ***********************************************************
#                         August
# ***********************************************************


# -----------------------------------------------------------
# The arrangement of the order of the columns
# -----------------------------------------------------------
'''
august.columns = ['eventId', 'originId', 'eventCreateDate', 'originSystem', 'operation',
       'isChargeback', 'operationType', 'operationCode', 'originCreateDate',
       'userId', 'accountId', 'accountType', 'receipt', 'accountId',
       'accountType', 'cooperativeId', 'accountName', 'amount', 'installment',
       'installmentAmount', 'originalCurrencyAmount', 'originalCurrency',
       'description', 'category', 'customTransaction', 'flagIncomeTax',
       'hashtag', 'costType', 'description.value', 'description.date',
       'category.value', 'category.name', 'category.imageCode',
       'category.date', 'hashtag.value', 'hashtag.date', 'originUserId',
       'destinyUserId', 'storeName', 'cardMCC']
'''

# -----------------------------------------------------------
# I convert negative amount values ​​to positive values ​​and create a new column
# -----------------------------------------------------------

august['new_amount'] = august['amount'].abs()


# -----------------------------------------------------------
# I convert the time column to DateTime format
# -----------------------------------------------------------

august['eventCreateDate'] = pd.to_datetime(august['eventCreateDate']) 

In [None]:
# ***********************************************************
#                         September
# ***********************************************************


# -----------------------------------------------------------
# The arrangement of the order of the columns
# -----------------------------------------------------------
'''
september.columns = ['eventId', 'originId', 'eventCreateDate', 'originSystem', 'operation',
       'isChargeback', 'operationType', 'operationCode', 'originCreateDate',
       'userId', 'accountId', 'accountType', 'receipt', 'accountId',
       'accountType', 'cooperativeId', 'accountName', 'amount', 'installment',
       'installmentAmount', 'originalCurrencyAmount', 'originalCurrency',
       'description', 'category', 'customTransaction', 'flagIncomeTax',
       'hashtag', 'costType', 'description.value', 'description.date',
       'category.value', 'category.name', 'category.imageCode',
       'category.date', 'hashtag.value', 'hashtag.date', 'originUserId',
       'destinyUserId', 'storeName', 'cardMCC']
'''

# -----------------------------------------------------------
# I convert negative amount values ​​to positive values ​​and create a new column
# -----------------------------------------------------------                          

september['new_amount'] = september['amount'].abs()


# -----------------------------------------------------------
# I convert the time column to DateTime format
# -----------------------------------------------------------

september['eventCreateDate'] = pd.to_datetime(september['eventCreateDate']) 

In [None]:
# ***********************************************************
#                         October
# ***********************************************************


# -----------------------------------------------------------
# The arrangement of the order of the columns
# -----------------------------------------------------------
'''
october.columns = ['eventId', 'originId', 'eventCreateDate', 'originSystem', 'operation',
       'isChargeback', 'operationType', 'operationCode', 'originCreateDate',
       'userId', 'accountId', 'accountType', 'receipt', 'accountId',
       'accountType', 'cooperativeId', 'accountName', 'amount', 'installment',
       'installmentAmount', 'originalCurrencyAmount', 'originalCurrency',
       'description', 'category', 'customTransaction', 'flagIncomeTax',
       'hashtag', 'costType', 'description.value', 'description.date',
       'category.value', 'category.name', 'category.imageCode',
       'category.date', 'hashtag.value', 'hashtag.date', 'originUserId',
       'destinyUserId', 'storeName', 'cardMCC']
'''

# -----------------------------------------------------------
# I convert negative amount values ​​to positive values ​​and create a new column
# -----------------------------------------------------------

october['new_amount'] = october['amount'].abs() 


# -----------------------------------------------------------
# I convert the time column to DateTime format
# -----------------------------------------------------------

october['eventCreateDate'] = pd.to_datetime(october['eventCreateDate']) 

In [None]:
# -----------------------------------------------------------
# Merging all data frames
# -----------------------------------------------------------

df = pd.concat([july,august,september,october])


# -----------------------------------------------------------
# Deleting all empty columns
# -----------------------------------------------------------

df.dropna(how='all',axis=1,inplace=True)

In [None]:
'''


# July

expenses_july = []
for i in range(len(july)):

  if july.iloc[i]['operation'] == 'Debito':
    expenses_july.append(1)

  elif july.iloc[i]['operation'] == 'Credito' and july.iloc[i]['accountType'] == 'ContaCorrente':
      expenses_july.append(1)
  elif july.iloc[i]['operation'] == 'Credito' and july.iloc[i]['accountType'] == 'CartaoDeCredito':
      expenses_july.append(1)
  elif july.iloc[i]['operation'] == 'Credito' and july.iloc[i]['accountType'] == 'Poupanca':
      expenses_july.append(0)
  elif july.iloc[i]['operation'] == 'Credito' and july.iloc[i]['accountType'] == 'ContaCapital':
      expenses_july.append(0)
  elif july.iloc[i]['operation'] == 'Credito' and july.iloc[i]['accountType'] == 'ValeRefeicao':
      expenses_july.append(1)

  else:
    expenses_july.append(0)

july['expenses'] = expenses_july


'''

In [None]:
'''


# August

expenses_august = []
for i in range(len(august)):

  if august.iloc[i]['operation'] == 'Debito':
    expenses_august.append(1)

  elif august.iloc[i]['operation'] == 'Credito' and august.iloc[i]['accountType'] == 'ContaCorrente':
      expenses_august.append(1)
  elif august.iloc[i]['operation'] == 'Credito' and august.iloc[i]['accountType'] == 'CartaoDeCredito':
      expenses_august.append(1)
  elif august.iloc[i]['operation'] == 'Credito' and august.iloc[i]['accountType'] == 'Poupanca':
      expenses_august.append(0)
  elif august.iloc[i]['operation'] == 'Credito' and august.iloc[i]['accountType'] == 'ContaCapital':
      expenses_august.append(0)
  elif august.iloc[i]['operation'] == 'Credito' and august.iloc[i]['accountType'] == 'ValeRefeicao':
      expenses_august.append(1)

  else:
    expenses_august.append(0)

august['expenses'] = expenses_august


'''

In [None]:
'''


# September

expenses_september = []
for i in range(len(september)):

  if september.iloc[i]['operation'] == 'Debito':
    expenses_september.append(1)

  elif september.iloc[i]['operation'] == 'Credito' and september.iloc[i]['accountType'] == 'ContaCorrente':
      expenses_september.append(1)
  elif september.iloc[i]['operation'] == 'Credito' and september.iloc[i]['accountType'] == 'CartaoDeCredito':
      expenses_september.append(1)
  elif september.iloc[i]['operation'] == 'Credito' and september.iloc[i]['accountType'] == 'Poupanca':
      expenses_september.append(0)
  elif september.iloc[i]['operation'] == 'Credito' and september.iloc[i]['accountType'] == 'ContaCapital':
      expenses_september.append(0)
  elif september.iloc[i]['operation'] == 'Credito' and september.iloc[i]['accountType'] == 'ValeRefeicao':
      expenses_september.append(1)

  else:
    expenses_september.append(0)

september['expenses'] = expenses_september


'''

In [None]:
'''


# October

expenses_october = []
for i in range(len(october)):

  if october.iloc[i]['operation'] == 'Debito':
    expenses_october.append(1)

  elif october.iloc[i]['operation'] == 'Credito' and october.iloc[i]['accountType'] == 'ContaCorrente':
      expenses_october.append(1)
  elif october.iloc[i]['operation'] == 'Credito' and october.iloc[i]['accountType'] == 'CartaoDeCredito':
      expenses_october.append(1)
  elif october.iloc[i]['operation'] == 'Credito' and october.iloc[i]['accountType'] == 'Poupanca':
      expenses_october.append(0)
  elif october.iloc[i]['operation'] == 'Credito' and october.iloc[i]['accountType'] == 'ContaCapital':
      expenses_october.append(0)
  elif october.iloc[i]['operation'] == 'Credito' and october.iloc[i]['accountType'] == 'ValeRefeicao':
      expenses_october.append(1)

  else:
    expenses_october.append(0)

october['expenses'] = expenses_october


'''

In [None]:
'''

# July

july['july_expense_amount'] = 'nan'


for i in july[july['expenses'] == 1].groupby('userId').sum()[['new_amount']].index:
  for k in range(len(july)):
    if july.iloc[k]['userId'] == i:
      july['july_expense_amount'].iloc[k] == july[july['expenses'] == 1].groupby('userId').sum()[['new_amount']].iloc[i].values[0]

#August

august['august_expense_amount'] = 'nan'


for i in august[august['expenses'] == 1].groupby('userId').sum()[['new_amount']].index:
  for k in range(len(august)):
    if august.iloc[k]['userId'] == i:
      august['august_expense_amount'].iloc[k] == august[august['expenses'] == 1].groupby('userId').sum()[['new_amount']].iloc[i].values[0]

# September

september['september_expense_amount'] = 'nan'


for i in september[september['expenses'] == 1].groupby('userId').sum()[['new_amount']].index:
  for k in range(len(september)):
    if september.iloc[k]['userId'] == i:
      september['september_expense_amount'].iloc[k] == september[september['expenses'] == 1].groupby('userId').sum()[['new_amount']].iloc[i].values[0]

# October

october['october_expense_amount'] = 'nan'


for i in october[october['expenses'] == 1].groupby('userId').sum()[['new_amount']].index:
  for k in range(len(october)):
    if october.iloc[k]['userId'] == i:
      october['october_expense_amount'].iloc[k] == october[october['expenses'] == 1].groupby('userId').sum()[['new_amount']].iloc[i].values[0]


'''

<a id="2"></a> <br>
# Understanding The Data

<font color='green'>
At this stage, I find the answers to the questions I created to analyze the data

<a id="2.0"></a> <br>

## Columns Descriptions

### **[*EN*]**
* ‘originId’, A type of internal ID not necessary for the analysis we’re doing **[we could discard this one]**

* ‘originSystem’, which system the info is coming from, it will always be CORE  **[we could discard this one]**

* ‘operationType’, type of account - it will always say 
  * ‘principal’ = main account since operations always happen in this account **[we could discard this one]**
  
  * Tarifa = fee (usually financial fees)
  * IOF = Interest for financial operations (usually 3% of the transaction when it applies)

* ‘account.cooperativeId’, this is also another type of internal ID **[we could discard this one]**

* ‘receipt’, it’s a receipt number **[we could discard this one]**

* ‘account.accountId’, an internal ID **[we could discard this one]**

* ‘isChargeback’, it shows whether the user is charged repeatedly (for example when it pays in installments)

* ‘operationCode’, it’s the umbrella for the categories - it’s going to show if it’s and income or a purchase.

* ‘accountType’ 
  * conta corrente = checkings account 
  * poupança = savings account
  * Contacorrente = checkings account

* ‘editable.category.imageCode’, this is the category the user chooses when making an expense (apparently there is an image he/she chooses) and it goes from savings to rent, tranfers, others, etc - these are given categories - it’d be nice to see which are all of them.

* ‘editable.hashtag.value’,  this is the user description if he wants to add more info on the transaction

* ‘movimentation.cardMCC’, Merchant Category code, it’s the range of categories businesses are separated in [airlines, services, restaurants, etc] we’re going to explore a little bit more on this tomorrow bc although MCCs are standard, each financial institution has it’s own type of category

* ‘storeName’, the ID name for the establishment where the transaction is being made



---



### **[*TR*]**


* ‘AplicativoTransporte’ = transportation app like uber
* ‘TelefoneTvInternet’ = telephone/ tv / internet bills
* ‘EletronicosMoveis’ = electronics and furniture, 
* ‘Restaurantes’ = restaurants
* ‘Outros’ = other 
* ‘Cinemas’ = cinema 
* ‘CafesPadarias’ = coffee places/bakeries
* ‘Conveniencia’ = convenience store
* ‘Mercado’ = supermarket 
* ‘Saques’ = withdrwals
* ‘OutrosTarifasTaxasFinanceiras’ = other interest or financial fees you need to pay such as account maintenance 
* ‘Assinaturas’ = subscriptions 
* ‘OutrosMoradia’ = other housing expenses
* ‘Farmacia’ = pharmacy 
* ‘OutrosLazerDiversao’ = other entertainment expenses
* ‘TransportePublico’ = public transporation
* ‘MedicosDentistas’ = doctors/dentists
* ‘Hospedagem’ = hotels 
* ‘FaculdadePos’ = university
* ‘CuidadosPessoais’ = personal care
* ‘Manutencao’ = maintenance 
* ‘OutrosVestuario’ = other apparel/clothing
* ‘Aluguel’ = rent
* ‘Cursos’ = courses 
* ‘Materiais’ = materials 
* ‘PagamentoCredito’ = credit/loan payment
* ‘Estacionamento’ = parking
* ‘Tarifas’ = fees 
* ‘Luz’ = electricity
* ‘RecargaCelular’ = phone credit/airtime
* ‘OutrosTransporte’ = other transportation expenses
* ‘AguaCondominio’ = water bill+building maintenance
* ‘OutrosEducacao’ = other educational expenses 
* ‘LivrosRevistas’ = books/magazines
* ‘Presentes’ = gifts 
* ‘Combustivel’ = gasoline 
* ‘Viagem’=trips
* ‘Trocas’ = 
* ‘Acessorios’=accesories


---

**These categories are not considered expenses**

* ‘Aplicacao’ = savings 
* ‘ReceitasNegocio’ = income received for your business
* ‘Rendimentos’ = interest gained from investments/savings 
* ‘ContratacaoCredito’ = credit/loan acquired
* ‘OutrosRenda’ = other income
* ‘DepositoMesmoCpf’ = transfers to this account from the same person (same iD number) but from a different bank account
* ‘TransferenciaMesmoCpf’ = transfer to another account with the same ID number 
* ‘Resgate’ = money you transfer from your savings to your checkings account to use


<a id="2.1"></a> <br>
## July

In [None]:
july.head(5)

In [None]:
july.info()

In [None]:
july.describe().T

### EN
1. How many transactions happened in July
1. How many customers made transactions in July
1. Which category were the most transactions in July
1. Which category has the highest expenditure amount in July
1. Who is the customer with the highest number of transactions in July
1. Who is the customer with the highest transaction volume in July
1. Credito and Debito comparison in July
1. Comparison of installment transactions in July
1. How many customer paid to Netflix in July
1. In July, how many people used UBER and how much total payment was made to the UBER
1. In July, End-of-month operating balances for each customer



---

### TR
1. Temmuz ayında kaç işlem olmuş
1. Temmuz ayında kaç müşteri işlem yapmış
1. Temmuz ayında en fazla hangi kategoride işlem olmuş
1. Temmuz ayında hangi kategorinin harcama tutarı en yüksektir
1. Temmuz ayında toplamda en fazla işlem adedi olan müşteri kim
1. Temmuz ayında toplamda en fazla işlem hacmi olan müşteri kim
1. Temmuz ayında Credito ve Debito karşılaştırılması
1. Temmuz ayında taksitli işlemlerin karşılaştırılması
1. Temmuz ayında Netflix'e kaç müşteri ödeme yaptı
1. Temmuz ayında kaç kişi UBER kullandı ve UBER'e toplam ne kadar ödeme yapıldı
1. Temmuz ayında her müşterinin ay sonu bakiyesi



In [None]:
# Unique values ​​of columns in the July dataset

print('====    JULY COLUMNS UNIQUE VALUES    ====\n')
for i in july.columns:
   print('Unique elements of',i,' - ',len(july[i].unique()))

<a id="2.1.1"></a> <br>
### **QUESTION 1**
### [*EN*]

### How many transactions happened in July

### [*TR*]
### Temmuz ayında kaç işlem olmuş

In [None]:
# We look at rows with the same values ​​as 'new_amount', 'eventCreateDate', 'userId' (duplicate rows)
july[july.duplicated(['new_amount','eventCreateDate','userId'],keep=False)]

In [None]:
# we remove one of the duplicate rows from our dataset. Because these are actually the same process 
# But they appear twice in our data set. this may mislead us when answering our question

drop_index = list(july[july.duplicated(['new_amount','eventCreateDate','userId'],keep='last')].index)
dropped_july = july.drop(drop_index,axis=0).reset_index(drop=True)
dropped_july.shape

In [None]:
print('There have been',len(dropped_july),'transactions in July.')

<a id="2.1.2"></a> <br>
### **QUESTION 2**
### [*EN*]

### How many customers made transactions in July

### [*TR*]
### Temmuz ayında kaç müşteri işlem yapmış

In [None]:
print(len(pd.DataFrame(july.groupby('userId').count())),'customer made a transaction in July')

<a id="2.1.3"></a> <br>
### **QUESTION 3**
### [*EN*]

### Which category were the most transactions in July

### [*TR*]
### Temmuz ayında en fazla hangi kategoride işlem olmuş

In [None]:
july_categ_freq = pd.DataFrame(dropped_july.groupby('category')['eventCreateDate'].count().sort_values(ascending=False).reset_index())
july_categ_freq.columns = ['category','freq']
july_categ_freq

In [None]:
plt.figure(figsize=(20,15))
ax = sns.countplot(x="category", data=dropped_july, palette="Blues_d",order = july['category'].value_counts().index)
plt.xlabel('Categories')
plt.ylabel('Total Transaction')
plt.title('Total Transactions By Category')
plt.xticks(rotation= 90);

<a id="2.1.4"></a> <br>
### **QUESTION 4**
### [*EN*]

### Which category has the highest expenditure amount in July

### [*TR*]
### Temmuz ayında hangi kategorinin harcama tutarı en yüksektir

In [None]:
# To calculate the spending amount, we remove the categories in the 'not_exspenses_categ' list from our data set. We also only look at transactions that are 'Debito'

not_expenses_categ = ['Aplicacao', 'ReceitasNegocio', 'Rendimentos', 'ContratacaoCredito', 'OutrosRenda', 'DepositoMesmoCpf','TransferenciaMesmoCpf','Resgate']
july_categ_amount = dropped_july[(~dropped_july['category'].isin(not_expenses_categ)) & (dropped_july['operation'] == 'Debito')]
july_categ_amount = july_categ_amount.groupby('category')[['new_amount']].sum().sort_values('new_amount',ascending=False).reset_index()
july_categ_amount.columns = ['category', 'total_amount']
july_categ_amount

In [None]:
plt.figure(figsize=(20,15))
g = sns.barplot(x="category", y="total_amount",data=july_categ_amount)

plt.xlabel('Categories')
plt.ylabel('Total Amount')
plt.title('Total Amount by Categories')
plt.xticks(rotation= 90);

<a id="2.1.5"></a> <br>
### **QUESTION 5**
### [*EN*]

### Who is the customer with the highest number of transactions in July

### [*TR*]
### Temmuz ayında toplamda en fazla işlem adedi olan müşteri kim

In [None]:
july_customer_freq = dropped_july.groupby('userId').count()[['eventCreateDate']].sort_values('eventCreateDate',ascending=False).reset_index()
july_customer_freq.columns = ['customer_id','customer_freq']
july_customer_freq

<a id="2.1.6"></a> <br>
### **QUESTION 6**
### [*EN*]

### Who is the customer with the highest transaction volume in July

### [*TR*]
### Temmuz ayında toplamda en fazla işlem hacmi olan müşteri kim

In [None]:
# To calculate the spending amount, we remove the categories in the 'not_exspenses_categ' list from our data set. We also only look at transactions that are 'Debito'

not_expenses_categ = ['Aplicacao', 'ReceitasNegocio', 'Rendimentos', 'ContratacaoCredito', 'OutrosRenda', 'DepositoMesmoCpf','TransferenciaMesmoCpf','Resgate']
july_customer_amount = dropped_july[(~dropped_july['category'].isin(not_expenses_categ)) & (dropped_july['operation'] == 'Debito')]
july_customer_amount = july_customer_amount.groupby('userId')[['new_amount']].sum().sort_values('new_amount',ascending=False).reset_index()
july_customer_amount

In [None]:
dropped_july[dropped_july['userId'] == 71]

In [None]:
dropped_july[dropped_july['userId'] == 2389]

<a id="2.1.7"></a> <br>
### **QUESTION 7**
### [*EN*]

### Credito and Debito comparison in July

### [*TR*]
### Temmuz ayında Credito ve Debito karşılaştırılması

In [None]:
july_credit_debit = pd.DataFrame(july['operation'].value_counts()).reset_index()
july_credit_debit.columns = ['operation','freq']
july_credit_debit

In [None]:
plt.figure(figsize=(12,9))
ax = sns.countplot(x="operation", data=july, palette="ch:2.5,-.2,dark=.3")
plt.xlabel('Operation Type')
plt.ylabel('Total Transaction')
plt.title('Comparison of Credito and Debito')

<a id="2.1.8"></a> <br>
### **QUESTION 8**
### [*EN*]

### Comparison of installment transactions in July

### [*TR*]
### Temmuz ayında taksitli işlemlerin karşılaştırılması

In [None]:
july_installment_freq = pd.DataFrame(july['installment'].value_counts()).reset_index()
july_installment_freq.columns = ['number_of_installments', 'freq']
july_installment_freq

In [None]:
# Taken the logarithm of the number of installments in July

plt.figure(figsize=(15,10))
g = sns.barplot(x="number_of_installments", y="freq",data=july_installment_freq)
plt.xlabel('Number of Installments')
plt.ylabel('Total Installments Count')
plt.title('Comparison of Installment Counts')
g.set_yscale("log")

In [None]:
july[july['installment'] == 10]

In [None]:
july[july['installment'] == 12]

In [None]:
july[july['installment'] == 10]['storeName'].value_counts()

In [None]:
july[july['installment'] == 12]['storeName'].value_counts()

<a id="2.1.9"></a> <br>
### **QUESTION 9**
### [*EN*]

### How many customer paid to Netflix in July

### [*TR*]
### Temmuz ayında Netflix'e kaç müşteri ödeme yaptı


In [None]:
# Netflix transactions in July

dropped_july[dropped_july['storeName'] == 'NETFLIX.COM']

In [None]:
# Number of customers who made Netflix transactions in July

print(len(dropped_july[dropped_july['storeName'] == 'NETFLIX.COM'].groupby('userId')['eventCreateDate'].count()),'customers made netflix transaction in July')

In [None]:
# Total amount made to Netflix in July
print('A total of', dropped_july[dropped_july['storeName'] == 'NETFLIX.COM']['new_amount'].sum() ,'spent on Netflix in July')

<a id="2.1.10"></a> <br>
### **QUESTION 10**
### [*EN*]

### In July, how many people used UBER and how much total payment was made to the UBER

### [*TR*]
### Temmuz ayında kaç kişi UBER kullandı ve UBER'e toplam ne kadar ödeme yapıldı

In [None]:
# UBER transactions in July

dropped_july[dropped_july['storeName'] == 'Uber Do Brasil Tecnologia']

In [None]:
# Number of customers who made UBER transactions in July

july_uber = dropped_july[dropped_july['storeName'] == 'Uber Do Brasil Tecnologia']
print(len(july_uber.groupby('userId')['eventCreateDate'].count()),'customers made UBER transaction in July')

In [None]:
# Total amount made to UBER in July

print('A total of',  july_uber['new_amount'].sum() ,'spent on UBER in July')

<a id="2.1.11"></a> <br>
### **QUESTION 11**
### [*EN*]

### In July, End-of-month operating balances for each customer

### [*TR*]
### Temmuz ayında her müşterinin ay sonu bakiyesi

TransferenciaEntreContas
> Hesaplar Arası Transfer [TR]

> Transfer Between Accounts [EN]

In [None]:
july_eom = dropped_july[dropped_july['operationCode'] != 'TransferenciaEntreContas'].groupby('userId')[['amount']].agg(sum).sort_index(ascending=True)
july_eom.columns = ['eom_balance']
july_eom

<a id="2.2"></a> <br>
## August

In [None]:
august.head(5)

In [None]:
august.info()

In [None]:
august.describe().T

### EN
1. How many transactions happened in August
1. How many customers made transactions in August
1. Which category were the most transactions in August
1. Which category has the highest expenditure amount in August
1. Who is the customer with the highest number of transactions in August
1. Who is the customer with the highest transaction volume in August
1. Credito and Debito comparison in August
1. Comparison of installment transactions in August
1. How many customer paid to Netflix in August
1. In August, how many people used UBER and how much total payment was made to the UBER
1. In August, End-of-month operating balances for each customer



---

### TR
1. Ağustos ayında kaç işlem olmuş
1. Ağustos ayında kaç müşteri işlem yapmış
1. Ağustos ayında en fazla hangi kategoride işlem olmuş
1. Ağustos ayında hangi kategorinin harcama tutarı en yüksektir
1. Ağustos ayında toplamda en fazla işlem adedi olan müşteri kim
1. Ağustos ayında toplamda en fazla işlem hacmi olan müşteri kim
1. Ağustos ayında Credito ve Debito karşılaştırılması
1. Ağustos ayında taksitli işlemlerin karşılaştırılması
1. Ağustos ayında Netflix'e kaç müşteri ödeme yaptı
1. Ağustos ayında kaç kişi UBER kullandı ve UBER'e toplam ne kadar ödeme yapıldı
1. Ağustos ayında her müşterinin ay sonu bakiyesi



In [None]:
# Unique values ​​of columns in the August dataset

print('====    AUGUST COLUMNS UNIQUE VALUES    ====\n')
for i in august.columns:
   print('Unique elements of',i,' - ',len(august[i].unique()))

<a id="2.2.1"></a> <br>
### **QUESTION 1**
### [*EN*]

### How many transactions happened in August

### [*TR*]
### Ağustos ayında kaç işlem olmuş

In [None]:
# We look at rows with the same values ​​as 'new_amount', 'eventCreateDate', 'userId' (duplicate rows)
august[august.duplicated(['new_amount','eventCreateDate','userId'],keep=False)]

In [None]:
# we remove one of the duplicate rows from our dataset. Because these are actually the same process 
# But they appear twice in our data set. this may mislead us when answering our question

drop_index = list(august[august.duplicated(['new_amount', 'eventCreateDate','userId'],keep='last')].index)
dropped_august = august.drop(drop_index,axis=0).reset_index(drop=True)
dropped_august.shape

In [None]:
print('There have been',len(dropped_august),'transactions in August.')

<a id="2.2.2"></a> <br>
### **QUESTION 2**
### [*EN*]

### How many customers made transactions in August

### [*TR*]
### Ağustos ayında kaç müşteri işlem yapmış

In [None]:
print(len(pd.DataFrame(august.groupby('userId').count())),'customer made a transaction in August')

<a id="2.2.3"></a> <br>
### **QUESTION 3**
### [*EN*]

### Which category were the most transactions in August

### [*TR*]
### Ağustos ayında en fazla hangi kategoride işlem olmuş

In [None]:
august_categ_freq = pd.DataFrame(dropped_august.groupby('category')['eventCreateDate'].count().sort_values(ascending=False).reset_index())
august_categ_freq.columns = ['category','freq']
august_categ_freq

In [None]:
plt.figure(figsize=(20,15))
ax = sns.countplot(x="category", data=dropped_august, palette="Blues_d",order = august['category'].value_counts().index)
plt.xlabel('Categories')
plt.ylabel('Total Transaction')
plt.title('Total Transactions By Category')
plt.xticks(rotation= 90);

<a id="2.2.4"></a> <br>
### **QUESTION 4**
### [*EN*]

### Which category has the highest expenditure amount in August

### [*TR*]
### Ağustos ayında hangi kategorinin harcama tutarı en yüksektir

In [None]:
# To calculate the spending amount, we remove the categories in the 'not_exspenses_categ' list from our data set. We also only look at transactions that are 'Debito'

not_expenses_categ = ['Aplicacao', 'ReceitasNegocio', 'Rendimentos', 'ContratacaoCredito', 'OutrosRenda', 'DepositoMesmoCpf','TransferenciaMesmoCpf','Resgate']
august_categ_amount = dropped_august[(~dropped_august['category'].isin(not_expenses_categ)) & (dropped_august['operation'] == 'Debito')]
august_categ_amount = august_categ_amount.groupby('category')[['new_amount']].sum().sort_values('new_amount',ascending=False).reset_index()
august_categ_amount.columns = ['category', 'total_amount']
august_categ_amount

In [None]:
plt.figure(figsize=(20,15))
g = sns.barplot(x="category", y="total_amount",data=august_categ_amount)

plt.xlabel('Categories')
plt.ylabel('Total Amount')
plt.title('Total Amount by Categories')
plt.xticks(rotation= 90);

<a id="2.2.5"></a> <br>
### **QUESTION 5**
### [*EN*]

### Who is the customer with the highest number of transactions in August

### [*TR*]
### Ağustos ayında toplamda en fazla işlem adedi olan müşteri kim

In [None]:
august_customer_freq = dropped_august.groupby('userId').count()[['eventCreateDate']].sort_values('eventCreateDate',ascending=False).reset_index()
august_customer_freq.columns = ['customer_id','customer_freq']
august_customer_freq

<a id="2.2.6"></a> <br>
### **QUESTION 6**
### [*EN*]

### Who is the customer with the highest transaction volume in August

### [*TR*]
### Ağustos ayında toplamda en fazla işlem hacmi olan müşteri kim

In [None]:
# To calculate the spending amount, we remove the categories in the 'not_exspenses_categ' list from our data set. We also only look at transactions that are 'Debito'

not_expenses_categ = ['Aplicacao', 'ReceitasNegocio', 'Rendimentos', 'ContratacaoCredito', 'OutrosRenda', 'DepositoMesmoCpf','TransferenciaMesmoCpf','Resgate']
august_customer_amount = dropped_august[(~dropped_august['category'].isin(not_expenses_categ)) & (dropped_august['operation'] == 'Debito')]
august_customer_amount = august_customer_amount.groupby('userId')[['new_amount']].sum().sort_values('new_amount',ascending=False).reset_index()
august_customer_amount

In [None]:
dropped_august[dropped_august['userId'] == 1101]

<a id="2.2.7"></a> <br>
### **QUESTION 7**
### [*EN*]

### Credito and Debito comparison in August

### [*TR*]
### Ağustos ayında Credito ve Debito karşılaştırılması

In [None]:
august_credit_debit = pd.DataFrame(august['operation'].value_counts()).reset_index()
august_credit_debit.columns = ['operation','freq']
august_credit_debit

In [None]:
plt.figure(figsize=(12,9))
ax = sns.countplot(x="operation", data=august, palette="ch:2.5,-.2,dark=.3")
plt.xlabel('Operation Type')
plt.ylabel('Total Transaction')
plt.title('Comparison of Credito and Debito')

<a id="2.2.8"></a> <br>
### **QUESTION 8**
### [*EN*]

### Comparison of installment transactions in August

### [*TR*]
### Ağustos ayında taksitli işlemlerin karşılaştırılması

In [None]:
august_installment_freq = pd.DataFrame(august['installment'].value_counts()).reset_index()
august_installment_freq.columns = ['number_of_installments', 'freq']
august_installment_freq

In [None]:
# Taken the logarithm of the number of installments in August

plt.figure(figsize=(15,10))
g = sns.barplot(x="number_of_installments", y="freq",data=august_installment_freq)
plt.xlabel('Number of Installments')
plt.ylabel('Total Installments Count')
plt.title('Comparison of Installment Counts')
g.set_yscale("log")

<a id="2.2.9"></a> <br>
### **QUESTION 9**
### [*EN*]

### How many customer paid to Netflix in August

### [*TR*]
### Ağustos ayında Netflix'e kaç müşteri ödeme yaptı


In [None]:
# Netflix transactions in July

dropped_august[dropped_august['storeName'] == 'NETFLIX.COM']

In [None]:
# Number of customers who made Netflix transactions in August

print(len(dropped_august[dropped_august['storeName'] == 'NETFLIX.COM'].groupby('userId')['eventCreateDate'].count()),'customers made netflix transaction in August')

In [None]:
# Total amount made to Netflix in August
print('A total of', dropped_august[dropped_august['storeName'] == 'NETFLIX.COM']['new_amount'].sum() ,'spent on Netflix in August')

<a id="2.2.10"></a> <br>
### **QUESTION 10**
### [*EN*]

### In August, how many people used UBER and how much total payment was made to the UBER

### [*TR*]
### Ağustos ayında kaç kişi UBER kullandı ve UBER'e toplam ne kadar ödeme yapıldı

In [None]:
# UBER transactions in August

dropped_august[dropped_august['storeName'] == 'Uber Do Brasil Tecnologia']

In [None]:
# Number of customers who made UBER transactions in August

august_uber = dropped_august[dropped_august['storeName'] == 'Uber Do Brasil Tecnologia']
print(len(august_uber.groupby('userId')['eventCreateDate'].count()),'customers made UBER transaction in August')

In [None]:
# Total amount made to UBER in August

print('A total of',  august_uber['new_amount'].sum() ,'spent on UBER in August')

<a id="2.2.11"></a> <br>
### **QUESTION 11**
### [*EN*]

### In August, End-of-month operating balances for each customer

### [*TR*]
### Ağustos ayında her müşterinin ay sonu bakiyesi

In [None]:
august_eom = dropped_august[dropped_august['operationCode'] != 'TransferenciaEntreContas'].groupby('userId')[['amount']].agg(sum).sort_index(ascending=True)
august_eom.columns = ['eom_balance']
august_eom

<a id="2.3"></a> <br>
## September

In [None]:
september.head(5)

In [None]:
september.info()

In [None]:
september.describe().T

### EN
1. How many transactions happened in September
1. How many customers made transactions in September
1. Which category were the most transactions in September
1. Which category has the highest expenditure amount in September
1. Who is the customer with the highest number of transactions in September
1. Who is the customer with the highest transaction volume in September
1. Credito and Debito comparison in September
1. Comparison of installment transactions in September
1. How many customer paid to Netflix in September
1. In September, how many people used UBER and how much total payment was made to the UBER
1. In September, End-of-month operating balances for each customer



---

### TR
1. Eylül ayında kaç işlem olmuş
1. Eylül ayında kaç müşteri işlem yapmış
1. Eylül ayında en fazla hangi kategoride işlem olmuş
1. Eylül ayında hangi kategorinin harcama tutarı en yüksektir
1. Eylül ayında toplamda en fazla işlem adedi olan müşteri kim
1. Eylül ayında toplamda en fazla işlem hacmi olan müşteri kim
1. Eylül ayında Credito ve Debito karşılaştırılması
1. Eylül ayında taksitli işlemlerin karşılaştırılması
1. Eylül ayında Netflix'e kaç müşteri ödeme yaptı
1. Eylül ayında kaç kişi UBER kullandı ve UBER'e toplam ne kadar ödeme yapıldı
1. Eylül ayında her müşterinin ay sonu bakiyesi



In [None]:
# Unique values ​​of columns in the September dataset

print('====    SEPTEMBER COLUMNS UNIQUE VALUES    ====\n')
for i in september.columns:
   print('Unique elements of',i,' - ',len(september[i].unique()))

<a id="2.3.1"></a> <br>
### **QUESTION 1**
### [*EN*]

### How many transactions happened in September

### [*TR*]
### Eylül ayında kaç işlem olmuş

In [None]:
# We look at rows with the same values ​​as 'new_amount', 'eventCreateDate', 'userId' (duplicate rows)
september[september.duplicated(['new_amount', 'eventCreateDate','userId'],keep=False)]

In [None]:
# we remove one of the duplicate rows from our dataset. Because these are actually the same process 
# But they appear twice in our data set. this may mislead us when answering our question

drop_index = list(september[september.duplicated(['new_amount', 'eventCreateDate','userId'],keep='last')].index)
dropped_september = september.drop(drop_index,axis=0).reset_index(drop=True)
dropped_september.shape

In [None]:
print('There have been',len(dropped_september),'transactions in September.')

<a id="2.3.2"></a> <br>
### **QUESTION 2**
### [*EN*]

### How many customers made transactions in September

### [*TR*]
### Eylül ayında kaç müşteri işlem yapmış

In [None]:
print(len(pd.DataFrame(september.groupby('userId').count())),'customer made a transaction in September')

<a id="2.3.3"></a> <br>
### **QUESTION 3**
### [*EN*]

### Which category were the most transactions in September

### [*TR*]
### Eylül ayında en fazla hangi kategoride işlem olmuş

In [None]:
september_categ_freq = pd.DataFrame(dropped_september.groupby('category')['eventCreateDate'].count().sort_values(ascending=False).reset_index())
september_categ_freq.columns = ['category','freq']
september_categ_freq

In [None]:
plt.figure(figsize=(20,15))
ax = sns.countplot(x="category", data=dropped_september, palette="Blues_d",order = september['category'].value_counts().index)
plt.xlabel('Categories')
plt.ylabel('Total Transaction')
plt.title('Total Transactions By Category')
plt.xticks(rotation= 90);

<a id="2.3.4"></a> <br>
### **QUESTION 4**
### [*EN*]

### Which category has the highest expenditure amount in September

### [*TR*]
### Eylül ayında hangi kategorinin harcama tutarı en yüksektir

In [None]:
# To calculate the spending amount, we remove the categories in the 'not_exspenses_categ' list from our data set. We also only look at transactions that are 'Debito'

not_expenses_categ = ['Aplicacao', 'ReceitasNegocio', 'Rendimentos', 'ContratacaoCredito', 'OutrosRenda', 'DepositoMesmoCpf','TransferenciaMesmoCpf','Resgate']
september_categ_amount = dropped_september[(~dropped_september['category'].isin(not_expenses_categ)) & (dropped_september['operation'] == 'Debito')]
september_categ_amount = september_categ_amount.groupby('category')[['new_amount']].sum().sort_values('new_amount',ascending=False).reset_index()
september_categ_amount.columns = ['category', 'total_amount']
september_categ_amount

In [None]:
plt.figure(figsize=(20,15))
g = sns.barplot(x="category", y="total_amount",data=september_categ_amount)

plt.xlabel('Categories')
plt.ylabel('Total Amount')
plt.title('Total Amount by Categories')
plt.xticks(rotation= 90);

<a id="2.3.5"></a> <br>
### **QUESTION 5**
### [*EN*]

### Who is the customer with the highest number of transactions in September

### [*TR*]
### Eylül ayında toplamda en fazla işlem adedi olan müşteri kim

In [None]:
september_customer_freq = dropped_september.groupby('userId').count()[['eventCreateDate']].sort_values('eventCreateDate',ascending=False).reset_index()
september_customer_freq.columns = ['customer_id','customer_freq']
september_customer_freq

<a id="2.3.6"></a> <br>
### **QUESTION 6**
### [*EN*]

### Who is the customer with the highest transaction volume in September

### [*TR*]
### Eylül ayında toplamda en fazla işlem hacmi olan müşteri kim

In [None]:
# To calculate the spending amount, we remove the categories in the 'not_exspenses_categ' list from our data set. We also only look at transactions that are 'Debito'

not_expenses_categ = ['Aplicacao', 'ReceitasNegocio', 'Rendimentos', 'ContratacaoCredito', 'OutrosRenda', 'DepositoMesmoCpf','TransferenciaMesmoCpf','Resgate']
september_customer_amount = dropped_september[(~dropped_september['category'].isin(not_expenses_categ)) & (dropped_september['operation'] == 'Debito')]
september_customer_amount = september_customer_amount.groupby('userId')[['new_amount']].sum().sort_values('new_amount',ascending=False).reset_index()
september_customer_amount

In [None]:
dropped_september[dropped_september['userId'] == 71]

<a id="2.3.7"></a> <br>
### **QUESTION 7**
### [*EN*]

### Credito and Debito comparison in September

### [*TR*]
### Eylül ayında Credito ve Debito karşılaştırılması

In [None]:
september_credit_debit = pd.DataFrame(september['operation'].value_counts()).reset_index()
september_credit_debit.columns = ['operation','freq']
september_credit_debit

In [None]:
plt.figure(figsize=(12,9))
ax = sns.countplot(x="operation", data=september, palette="ch:2.5,-.2,dark=.3")
plt.xlabel('Operation Type')
plt.ylabel('Total Transaction')
plt.title('Comparison of Credito and Debito')

<a id="2.3.8"></a> <br>
### **QUESTION 8**
### [*EN*]

### Comparison of installment transactions in September

### [*TR*]
### Eylül ayında taksitli işlemlerin karşılaştırılması

In [None]:
september_installment_freq = pd.DataFrame(september['installment'].value_counts()).reset_index()
september_installment_freq.columns = ['number_of_installments', 'freq']
september_installment_freq

In [None]:
# Taken the logarithm of the number of installments in September

plt.figure(figsize=(15,10))
g = sns.barplot(x="number_of_installments", y="freq",data=september_installment_freq)
plt.xlabel('Number of Installments')
plt.ylabel('Total Installments Count')
plt.title('Comparison of Installment Counts')
g.set_yscale("log")

<a id="2.3.9"></a> <br>
### **QUESTION 9**
### [*EN*]

### How many customer paid to Netflix in September

### [*TR*]
### Eylül ayında Netflix'e kaç müşteri ödeme yaptı


In [None]:
# Netflix transactions in September

dropped_september[dropped_september['storeName'] == 'NETFLIX.COM']

In [None]:
# Number of customers who made Netflix transactions in September

print(len(dropped_september[dropped_september['storeName'] == 'NETFLIX.COM'].groupby('userId')['eventCreateDate'].count()),'customers made netflix transaction in September')

In [None]:
# Total amount made to Netflix in September
print('A total of', dropped_september[dropped_september['storeName'] == 'NETFLIX.COM']['new_amount'].sum() ,'spent on Netflix in September')

<a id="2.3.10"></a> <br>
### **QUESTION 10**
### [*EN*]

### In September, how many people used UBER and how much total payment was made to the UBER

### [*TR*]
### Eylül ayında kaç kişi UBER kullandı ve UBER'e toplam ne kadar ödeme yapıldı

In [None]:
# UBER transactions in September

dropped_september[dropped_september['storeName'] == 'Uber Do Brasil Tecnologia']

In [None]:
# Number of customers who made UBER transactions in September

september_uber = dropped_september[dropped_september['storeName'] == 'Uber Do Brasil Tecnologia']
print(len(september_uber.groupby('userId')['eventCreateDate'].count()),'customers made UBER transaction in September')

In [None]:
# Total amount made to UBER in September

print('A total of',  september_uber['new_amount'].sum() ,'spent on UBER in September')

<a id="2.3.11"></a> <br>
### **QUESTION 11**
### [*EN*]

### In September, End-of-month operating balances for each customer

### [*TR*]
### Eylül ayında her müşterinin ay sonu bakiyesi

In [None]:
september_eom = dropped_september[dropped_september['operationCode'] != 'TransferenciaEntreContas'].groupby('userId')[['amount']].agg(sum).sort_index(ascending=True)
september_eom.columns = ['eom_balance']
september_eom

<a id="2.4"></a> <br>
## October

In [None]:
october.head(5)

In [None]:
october.info()

In [None]:
october.describe().T

### EN
1. How many transactions happened in October
1. How many customers made transactions in October
1. Which category were the most transactions in October
1. Which category has the highest expenditure amount in October
1. Who is the customer with the highest number of transactions in October
1. Who is the customer with the highest transaction volume in October
1. Credito and Debito comparison in October
1. Comparison of installment transactions in October
1. How many customer paid to Netflix in October
1. In October, how many people used UBER and how much total payment was made to the UBER
1. In October, End-of-month operating balances for each customer



---

### TR
1. Ekim ayında kaç işlem olmuş
1. Ekim ayında kaç müşteri işlem yapmış
1. Ekim ayında en fazla hangi kategoride işlem olmuş
1. Ekim ayında hangi kategorinin harcama tutarı en yüksektir
1. Ekim ayında toplamda en fazla işlem adedi olan müşteri kim
1. Ekim ayında toplamda en fazla işlem hacmi olan müşteri kim
1. Ekim ayında Credito ve Debito karşılaştırılması
1. Ekim ayında taksitli işlemlerin karşılaştırılması
1. Ekim ayında Netflix'e kaç müşteri ödeme yaptı
1. Ekim ayında kaç kişi UBER kullandı ve UBER'e toplam ne kadar ödeme yapıldı
1. Ekim ayında her müşterinin ay sonu bakiyesi



In [None]:
# Unique values ​​of columns in the October dataset 

print('====    OCTOBER COLUMNS UNIQUE VALUES    ====\n')
for i in october.columns:
   print('Unique elements of',i,' - ',len(october[i].unique()))

<a id="2.4.1"></a> <br>
### **QUESTION 1**
### [*EN*]

### How many transactions happened in October

### [*TR*]
### Ekim ayında kaç işlem olmuş

In [None]:
# We look at rows with the same values ​​as 'new_amount', 'eventCreateDate', 'userId' (duplicate rows)
october[october.duplicated(['new_amount', 'eventCreateDate','userId'],keep=False)]

In [None]:
# we remove one of the duplicate rows from our dataset. Because these are actually the same process 
# But they appear twice in our data set. this may mislead us when answering our question

drop_index = list(october[october.duplicated(['new_amount', 'eventCreateDate','userId'],keep='last')].index)
dropped_october = october.drop(drop_index,axis=0).reset_index(drop=True)
dropped_october.shape

In [None]:
print('There have been',len(dropped_october),'transactions in October.')

<a id="2.4.2"></a> <br>
### **QUESTION 2**
### [*EN*]

### How many customers made transactions in October

### [*TR*]
### Ekim ayında kaç müşteri işlem yapmış

In [None]:
print(len(pd.DataFrame(october.groupby('userId').count())),'customer made a transaction in October')

<a id="2.4.3"></a> <br>
### **QUESTION 3**
### [*EN*]

### Which category were the most transactions in October

### [*TR*]
### Ekim ayında en fazla hangi kategoride işlem olmuş

In [None]:
october_categ_freq = pd.DataFrame(dropped_october.groupby('category')['eventCreateDate'].count().sort_values(ascending=False).reset_index())
october_categ_freq.columns = ['category','freq']
october_categ_freq

In [None]:
plt.figure(figsize=(20,15))
ax = sns.countplot(x="category", data=dropped_october, palette="Blues_d",order = october['category'].value_counts().index)
plt.xlabel('Categories')
plt.ylabel('Total Transaction')
plt.title('Total Transactions By Category')
plt.xticks(rotation= 90);

<a id="2.4.4"></a> <br>
### **QUESTION 4**
### [*EN*]

### Which category has the highest expenditure amount in October

### [*TR*]
### Ekim ayında hangi kategorinin harcama tutarı en yüksektir

In [None]:
# To calculate the spending amount, we remove the categories in the 'not_exspenses_categ' list from our data set. We also only look at transactions that are 'Debito'

not_expenses_categ = ['Aplicacao', 'ReceitasNegocio', 'Rendimentos', 'ContratacaoCredito', 'OutrosRenda', 'DepositoMesmoCpf','TransferenciaMesmoCpf','Resgate']
october_categ_amount = dropped_october[(~dropped_october['category'].isin(not_expenses_categ)) & (dropped_october['operation'] == 'Debito')]
october_categ_amount = october_categ_amount.groupby('category')[['new_amount']].sum().sort_values('new_amount',ascending=False).reset_index()
october_categ_amount.columns = ['category', 'total_amount']
october_categ_amount

In [None]:
plt.figure(figsize=(20,15))
g = sns.barplot(x="category", y="total_amount",data=october_categ_amount)

plt.xlabel('Categories')
plt.ylabel('Total Amount')
plt.title('Total Amount by Categories')
plt.xticks(rotation= 90);

<a id="2.4.5"></a> <br>
### **QUESTION 5**
### [*EN*]

### Who is the customer with the highest number of transactions in October

### [*TR*]
### Ekim ayında toplamda en fazla işlem adedi olan müşteri kim

In [None]:
october_customer_freq = dropped_october.groupby('userId').count()[['eventCreateDate']].sort_values('eventCreateDate',ascending=False).reset_index()
october_customer_freq.columns = ['customer_id','customer_freq']
october_customer_freq

<a id="2.4.6"></a> <br>
### **QUESTION 6**
### [*EN*]

### Who is the customer with the highest transaction volume in October

### [*TR*]
### Ekim ayında toplamda en fazla işlem hacmi olan müşteri kim

In [None]:
# To calculate the spending amount, we remove the categories in the 'not_exspenses_categ' list from our data set. We also only look at transactions that are 'Debito'

not_expenses_categ = ['Aplicacao', 'ReceitasNegocio', 'Rendimentos', 'ContratacaoCredito', 'OutrosRenda', 'DepositoMesmoCpf','TransferenciaMesmoCpf','Resgate']
october_customer_amount = dropped_october[(~dropped_october['category'].isin(not_expenses_categ)) & (dropped_october['operation'] == 'Debito')]
october_customer_amount = october_customer_amount.groupby('userId')[['new_amount']].sum().sort_values('new_amount',ascending=False).reset_index()
october_customer_amount

In [None]:
dropped_october[dropped_october['userId'] == 71]

<a id="2.4.7"></a> <br>
### **QUESTION 7**
### [*EN*]

### Credito and Debito comparison in October

### [*TR*]
### Ekim ayında Credito ve Debito karşılaştırılması

In [None]:
october_credit_debit = pd.DataFrame(october['operation'].value_counts()).reset_index()
october_credit_debit.columns = ['operation','freq']
october_credit_debit

In [None]:
plt.figure(figsize=(12,9))
ax = sns.countplot(x="operation", data=october, palette="ch:2.5,-.2,dark=.3")
plt.xlabel('Operation Type')
plt.ylabel('Total Transaction')
plt.title('Comparison of Credito and Debito');

<a id="2.4.8"></a> <br>
### **QUESTION 8**
### [*EN*]

### Comparison of installment transactions in October

### [*TR*]
### Ekim ayında taksitli işlemlerin karşılaştırılması

In [None]:
october_installment_freq = pd.DataFrame(october['installment'].value_counts()).reset_index()
october_installment_freq.columns = ['number_of_installments', 'freq']
october_installment_freq

In [None]:
# Taken the logarithm of the number of installments in October

plt.figure(figsize=(15,10))
g = sns.barplot(x="number_of_installments", y="freq",data=october_installment_freq)
plt.xlabel('Number of Installments')
plt.ylabel('Total Installments Count')
plt.title('Comparison of Installment Counts')
g.set_yscale("log")

<a id="2.4.9"></a> <br>
### **QUESTION 9**
### [*EN*]

### How many customer paid to Netflix in October

### [*TR*]
### Ekim ayında Netflix'e kaç müşteri ödeme yaptı


In [None]:
# Netflix transactions in October

dropped_october[dropped_october['storeName'] == 'NETFLIX.COM']

In [None]:
# Number of customers who made Netflix transactions in October

print(len(dropped_october[dropped_october['storeName'] == 'NETFLIX.COM'].groupby('userId')['eventCreateDate'].count()),'customers made netflix transaction in October')

In [None]:
# Total amount made to Netflix in October
print('A total of', dropped_october[dropped_october['storeName'] == 'NETFLIX.COM']['new_amount'].sum() ,'spent on Netflix in October')

<a id="2.4.10"></a> <br>
### **QUESTION 10**
### [*EN*]

### In October, how many people used UBER and how much total payment was made to the UBER

### [*TR*]
### Ekim ayında kaç kişi UBER kullandı ve UBER'e toplam ne kadar ödeme yapıldı

In [None]:
# UBER transactions in October

dropped_october[dropped_october['storeName'] == 'Uber Do Brasil Tecnologia']

In [None]:
# Number of customers who made UBER transactions in October

october_uber = dropped_october[dropped_october['storeName'] == 'Uber Do Brasil Tecnologia']
print(len(october_uber.groupby('userId')['eventCreateDate'].count()),'customers made UBER transaction in October')

In [None]:
# Total amount made to UBER in October

print('A total of',  october_uber['new_amount'].sum() ,'spent on UBER in October')

<a id="2.4.11"></a> <br>
### **QUESTION 11**
### [*EN*]

### In October, End-of-month operating balances for each customer

### [*TR*]
### Ekim ayında her müşterinin ay sonu bakiyesi

In [None]:
october_eom = dropped_october[dropped_october['operationCode'] != 'TransferenciaEntreContas'].groupby('userId')[['amount']].agg(sum).sort_index(ascending=True)
october_eom.columns = ['eom_balance']
october_eom

<a id="3"></a> <br>
# Comparison of Data

<font color='green'>
In this section I compare the results I found for months



### EN
1. Comparison of monthly transactions number
1. How many customers made transactions each month
1. Compare the category number of transactions
1. Comparison of monthly transaction amounts by categories
1. Comparison of customers transaction numbers
1. Comparison of the customer with the highest transaction volume
1. Comparison of installment transactions
1. Comparison of Number of customers who made Netflix transactions
1. Comparison of the total amounts spent on Netflix
1. Comparison of Number of customers who made UBER transactions
1. Comparison of the total amounts spent on UBER
1. Comparison of customers end-of-mounth balance



---

### TR
1. Aylık işlem sayısının karşılaştırılması
1. Aylık işlem yapan müşteri sayısının karşılaştırılması
1. Kategorilere göre aylık işlemlerin karşılaştırılması
1. Kategorilere göre aylık işlem tutarlarının karşılaştırılması
1. Müşteri işlem sayıları karşılaştırması
1. Müşteri işlem hacimleri karşılaştırılması
1. Taksitli işlemlerin karşılaştırılması
1. Netflix işlemi yapan müşteri sayısının karşılaştırılması
1. Netflix için harcanan toplam tutarların karşılaştırılması
1. UBER işlemi yapan müşteri sayısının karşılaştırılması
1. UBER için harcanan toplam tutarların karşılaştırılması
1. Müşterilerin ay sonu bakiyesinin karşılaştırılması


<a id="3.1"></a> <br>
### **Comparison 1**
### [*EN*]

### Comparison of monthly transactions number

### [*TR*]
### Aylık işlem sayısının karşılaştırılması

In [None]:
compare_trans = pd.DataFrame([len(dropped_july),
                              len(dropped_august),
                              len(dropped_september),
                              len(dropped_october)])

compare_trans.columns = ['transactions']
compare_trans.index = ['july','august','september','october']

plt.figure(figsize=(12,9))
sns.barplot(x=compare_trans.index, y='transactions',data=compare_trans);

<a id="3.2"></a> <br>
### **Comparison 2**
### [*EN*]

### How many customers made transactions each month

### [*TR*]
### Aylık işlem yapan müşteri sayısının karşılaştırılması

In [None]:
compare_customer_trans = pd.DataFrame([len(pd.DataFrame(july.groupby('userId').count())),
                                       len(pd.DataFrame(august.groupby('userId').count())),
                                       len(pd.DataFrame(september.groupby('userId').count())),
                                       len(pd.DataFrame(october.groupby('userId').count()))])

compare_customer_trans.columns = ['customer_trans']
compare_customer_trans.index = ['july','august','september','october']

plt.figure(figsize=(12,9))
sns.barplot(x=compare_customer_trans.index, y='customer_trans',data=compare_customer_trans,color="salmon");

<a id="3.3"></a> <br>
### **Comparison 3**
### [*EN*]

### Compare the category number of transactions

### [*TR*]
### Kategorilere göre aylık işlemlerin karşılaştırılması

In [None]:
july_categ_freq = july_categ_freq.set_index('category')
august_categ_freq = august_categ_freq.set_index('category')
september_categ_freq = september_categ_freq.set_index('category')
october_categ_freq = october_categ_freq.set_index('category')

In [None]:
compare_categ_freq = pd.concat([july_categ_freq, august_categ_freq,september_categ_freq,october_categ_freq],axis=1)
compare_categ_freq.columns = ['july','august','september','october']
compare_categ_freq

In [None]:
compare_categ_freq.iloc[:10,:].plot(kind='bar',figsize=(15,12))

<a id="3.4"></a> <br>
### **Comparison 4**
### [*EN*]

### Comparison of monthly transaction amounts by categories

### [*TR*]
### Kategorilere göre aylık işlem tutarlarının karşılaştırılması

In [None]:
july_categ_amount = july_categ_amount.set_index('category')
august_categ_amount = august_categ_amount.set_index('category')
september_categ_amount = september_categ_amount.set_index('category')
october_categ_amount = october_categ_amount.set_index('category')

In [None]:
compare_categ_amount = pd.concat([july_categ_amount, august_categ_amount,september_categ_amount,october_categ_amount],axis=1)
compare_categ_amount.columns = ['july','august','september','october']
compare_categ_amount

In [None]:
compare_categ_amount.iloc[:10,:].plot(kind='bar',figsize=(15,12));

<a id="3.5"></a> <br>
### **Comparison 5**
### [*EN*]

### Comparison of customers transaction numbers

### [*TR*]
### Müşteri işlem sayıları karşılaştırması

In [None]:
compare_customer_freq = pd.concat([july_customer_freq.set_index('customer_id'), 
                                   august_customer_freq.set_index('customer_id'),
                                   september_customer_freq.set_index('customer_id'),
                                   october_customer_freq.set_index('customer_id')],axis=1)
compare_customer_freq.columns = ['july','august','september','october']
compare_customer_freq

In [None]:
compare_customer_freq.sort_index().iloc[:20,:].plot(kind='bar',figsize=(15,12));

<a id="3.6"></a> <br>
### **Comparison 6**
### [*EN*]

### Comparison of the customer with the highest transaction volume

### [*TR*]
### Müşteri işlem hacimleri karşılaştırılması

In [None]:
compare_customer_amount = pd.concat([july_customer_amount.set_index('userId'), 
                                   august_customer_amount.set_index('userId'),
                                   september_customer_amount.set_index('userId'),
                                   october_customer_amount.set_index('userId')],axis=1)
compare_customer_amount.columns = ['july','august','september','october']
compare_customer_amount

In [None]:
compare_customer_amount.sort_index().iloc[:20,:].plot(kind='bar',figsize=(15,12));

<a id="3.7"></a> <br>
### **Comparison 7**
### [*EN*]

### Comparison of installment transactions

### [*TR*]
### Taksitli işlemlerin karşılaştırılması

In [None]:
compare_installment_freq = pd.concat([ july_installment_freq.set_index('number_of_installments'), 
                                       august_installment_freq.set_index('number_of_installments'),
                                       september_installment_freq.set_index('number_of_installments'),
                                       october_installment_freq.set_index('number_of_installments')],
                                       axis=1)

compare_installment_freq.columns = ['july','august','september','october']
compare_installment_freq

In [None]:
np.log(compare_installment_freq).plot(kind='bar',figsize=(15,12));

<a id="3.8"></a> <br>
### **Comparison 8**
### [*EN*]

### Comparison of Number of customers who made Netflix transactions

### [*TR*]
### Netflix işlemi yapan müşteri sayısının karşılaştırılması

In [None]:
compare_netflix_count = pd.DataFrame([   len(dropped_july[dropped_july['storeName'] == 'NETFLIX.COM'].groupby('userId')['eventCreateDate'].count()), 
    len(dropped_august[dropped_august['storeName'] == 'NETFLIX.COM'].groupby('userId')['eventCreateDate'].count()),
    len(dropped_september[dropped_september['storeName'] == 'NETFLIX.COM'].groupby('userId')['eventCreateDate'].count()),
    len(dropped_october[dropped_october['storeName'] == 'NETFLIX.COM'].groupby('userId')['eventCreateDate'].count())],columns=['freq'],index=['july','august','september','october'])


plt.figure(figsize=(12,9))
sns.barplot(x=compare_netflix_count.index, y='freq',data=compare_netflix_count)
plt.ylabel('Number of Customers Transactions');

<a id="3.9"></a> <br>
### **Comparison 9**
### [*EN*]

### Comparison of the total amounts spent on Netflix 

### [*TR*]
### Netflix için harcanan toplam tutarların karşılaştırılması

In [None]:
compare_netflix_amount = pd.DataFrame([dropped_july[dropped_july['storeName'] == 'NETFLIX.COM']['new_amount'].sum(), 
                                       dropped_august[dropped_august['storeName'] == 'NETFLIX.COM']['new_amount'].sum(),
                                       dropped_september[dropped_september['storeName'] == 'NETFLIX.COM']['new_amount'].sum(),
                                       dropped_october[dropped_october['storeName'] == 'NETFLIX.COM']['new_amount'].sum()],columns=['freq'],index=['july','august','september','october'])

plt.figure(figsize=(12,9))
sns.barplot(x=compare_netflix_amount.index, y='freq',data=compare_netflix_amount)
plt.ylabel('Total Amount');

<a id="3.10"></a> <br>
### **Comparison 10**
### [*EN*]

### Comparison of Number of customers who made UBER transactions

### [*TR*]
### UBER işlemi yapan müşteri sayısının karşılaştırılması

In [None]:
compare_uber_count = pd.DataFrame([   len(dropped_july[dropped_july['storeName'] == 'Uber Do Brasil Tecnologia'].groupby('userId')['eventCreateDate'].count()), 
    len(dropped_august[dropped_august['storeName'] == 'Uber Do Brasil Tecnologia'].groupby('userId')['eventCreateDate'].count()),
    len(dropped_september[dropped_september['storeName'] == 'Uber Do Brasil Tecnologia'].groupby('userId')['eventCreateDate'].count()),
    len(dropped_october[dropped_october['storeName'] == 'Uber Do Brasil Tecnologia'].groupby('userId')['eventCreateDate'].count())],columns=['freq'],index=['july','august','september','october'])


plt.figure(figsize=(12,9))
sns.barplot(x=compare_uber_count.index, y='freq',data=compare_uber_count)
plt.ylabel('Number of Customers Transactions');

<a id="3.11"></a> <br>
### **Comparison 11**
### [*EN*]

### Comparison of the total amounts spent on UBER 

### [*TR*]
### UBER için harcanan toplam tutarların karşılaştırılması

In [None]:
compare_uber_amount = pd.DataFrame([dropped_july[dropped_july['storeName'] == 'Uber Do Brasil Tecnologia']['new_amount'].sum(), 
                                       dropped_august[dropped_august['storeName'] == 'Uber Do Brasil Tecnologia']['new_amount'].sum(),
                                       dropped_september[dropped_september['storeName'] == 'Uber Do Brasil Tecnologia']['new_amount'].sum(),
                                       dropped_october[dropped_october['storeName'] == 'Uber Do Brasil Tecnologia']['new_amount'].sum()],columns=['freq'],index=['july','august','september','october'])


plt.figure(figsize=(12,9))
sns.barplot(x=compare_uber_amount.index, y='freq',data=compare_uber_amount)
plt.ylabel('Total Amount');

<a id="3.12"></a> <br>
### **Comparison 12**
### [*EN*]

### Comparison of customers end-of-mounth balance

### [*TR*]
### Müşterilerin ay sonu bakiyesinin karşılaştırılması

In [None]:
compare_eom = pd.concat([july_eom,august_eom,september_eom,october_eom],axis=1)
compare_eom.columns = ['july_eom','august_eom','september_eom','october_eom']
compare_eom

In [None]:
compare_eom.isna().sum()

In [None]:
compare_eom.dropna().sort_index().iloc[:20,:].plot(kind='bar',figsize=(15,12))

<a id="4"></a> <br>

# Data Analysis


* **Week 1** = *Analysis of 2 categories of expenses (Uber /Netflix) in october and the relationship to end-of-month running balance*

* **Week 2** = 

<a id="4.1"></a> <br>

## Week 1

In [None]:
# I am changing the date column as index. Because we can make transactions easier this way.
october.set_index('eventCreateDate',inplace=True)

# I am deleting the data of november values in october dataset
real_october = october[october.index.month == 10]

# I'm setting Netflix transactions in October to the variable 'october_netflix'
october_netflix = real_october[real_october['storeName'] == 'NETFLIX.COM'].sort_index()

# I'm setting UBER transactions in October to the variable 'october_uber'
october_uber = real_october[real_october['storeName'] == 'Uber Do Brasil Tecnologia'].sort_index()

In [None]:
october

<a id="4.1.1"></a> <br>
### Netflix

In [None]:
october_netflix.head()

<a id="4.1.1.1"></a> <br>

#### **QUESTION 1**
### [*EN*]

### How many customer paid to Netflix in October

### [*TR*]
### Ekim ayında Netflix'e kaç müşteri ödeme yaptı


In [None]:
print(len(october_netflix.groupby('userId')['operation'].count()),'(eleven thirteen) customers made netflix transaction in October')

<a id="4.1.1.2"></a> <br>

#### **QUESTION 2**
### [*EN*]

### How much was paid on Netflix in October?

### [*TR*]
### Ekim ayında Netflix'te ne kadar ödendi?

In [None]:
print('A total of',october_netflix['new_amount'].sum(),'(thirty-three thousand seven hundred and forty-nine) R$(Brazilian Real) spent on Netflix in October')

<a id="4.1.1.3"></a> <br>

#### **QUESTION 3**
### [*EN*]

### Comparison of payment channels made to Netflix in October

### [*TR*]
### Ekim ayında Netflix'e yapılan ödeme kanalları karşılaştırılması

In [None]:
# 1038 (one thousand thirty-eight) of the payments made to Netflix in October were made by credit card and 155 (one hundred fifty-five) by ContaCorrente

october_netflix['accountType'].value_counts()

In [None]:
october_netflix['accountType'].value_counts().plot(kind='bar',figsize=(10,9));

<a id="4.1.1.4"></a> <br>

#### **QUESTION 4**
### [*EN*]

### Customers paying multiple times to Netflix in October

### [*TR*]
### Ekim ayında Netflix'e birden fazla ödeme yapan müşteriler

In [None]:
# Here in October we see the customers who made more than 1 payment to Netflix

october_netflix_mlt = october_netflix.groupby('userId')[['operation']].count().sort_values('operation',ascending=False)
october_netflix_mlt = october_netflix_mlt[october_netflix_mlt['operation'] != 1]
october_netflix_mlt.columns = ['netflix_freq_october']
october_netflix_mlt

In [None]:
october_netflix_mlt['netflix_freq_october'].value_counts().plot(kind='bar',figsize=(10,9));

<a id="4.1.1.5"></a> <br>

#### **QUESTION 5**
### [*EN*]

### Comparing customers who made 2 or more payments to Netflix by months

### [*TR*]
### Netflix'e 2 veya daha fazla ödeme yapan müşterileri aylara göre karşılaştırması

In [None]:
#July

# I am changing the date column as index. Because we can make transactions easier this way.
july.set_index('eventCreateDate',inplace=True)

# I am deleting the data of november values in july dataset
real_july = july[july.index.month == 7]

# I'm setting Netflix transactions in July to the variable 'july_netflix'
july_netflix = real_july[real_july['storeName'] == 'NETFLIX.COM'].sort_index()

july_netflix_mlt = july_netflix.groupby('userId')[['operation']].count().sort_values('operation',ascending=False)
july_netflix_mlt = july_netflix_mlt[july_netflix_mlt['operation'] != 1]
july_netflix_mlt.columns = ['netflix_freq_july']





# August

# I am changing the date column as index. Because we can make transactions easier this way.
august.set_index('eventCreateDate',inplace=True)

# I am deleting the data of november values in august dataset
real_august = august[august.index.month == 8]

# I'm setting Netflix transactions in August to the variable 'august_netflix'
august_netflix = real_august[real_august['storeName'] == 'NETFLIX.COM'].sort_index()

august_netflix_mlt = august_netflix.groupby('userId')[['operation']].count().sort_values('operation',ascending=False)
august_netflix_mlt = august_netflix_mlt[august_netflix_mlt['operation'] != 1]
august_netflix_mlt.columns = ['netflix_freq_august']





#September

# I am changing the date column as index. Because we can make transactions easier this way.
september.set_index('eventCreateDate',inplace=True)

# I am deleting the data of november values in september dataset
real_september = september[september.index.month == 9]

# I'm setting Netflix transactions in September to the variable 'september_netflix'
september_netflix = real_september[real_september['storeName'] == 'NETFLIX.COM'].sort_index()

september_netflix_mlt = september_netflix.groupby('userId')[['operation']].count().sort_values('operation',ascending=False)
september_netflix_mlt = september_netflix_mlt[september_netflix_mlt['operation'] != 1]
september_netflix_mlt.columns = ['netflix_freq_september']


mlt = pd.concat([july_netflix_mlt,
           august_netflix_mlt,
           september_netflix_mlt,
           october_netflix_mlt],axis=1)

mlt.replace(np.nan,'nan',inplace=True)

In [None]:
mlt.iloc[[0]]['netflix_freq_july'] == 'nan'

index = []
for i in range(len(mlt)):
  count = 0
  if mlt.iloc[[i]]['netflix_freq_july'].values != 'nan':
    count +=1
  if mlt.iloc[[i]]['netflix_freq_august'].values != 'nan':
    count +=1
  if mlt.iloc[[i]]['netflix_freq_september'].values != 'nan':
    count +=1
  if mlt.iloc[[i]]['netflix_freq_october'].values != 'nan':
    count +=1   
            
  if count >= 3:
    index.append(mlt.iloc[[i]].index[0])

In [None]:
# We see how many times users pay on Netflix in which months
# Some customers paid 2 - 3 times to Netflix
mlt[mlt.index.isin(index)]

In [None]:
# We are looking at transactions since customer number 79 has paid more than 1 payment to Netflix in 3 months.


# !== NETFLIX PRICES 2019 (two thousand nineteen) ==! 

# Plano Básico - R$ 21,90 (twenty-one point nine)
# Plano Padrão - R$ 32,90 (thirty-two point nine)
# Plano Premium - R$ 45,90 (forty-five point nine)

In [None]:
df[df['storeName'] == 'NETFLIX.COM']['amount'].value_counts()

In [None]:
july[(july['userId'] == 663) & (july['storeName'] == 'NETFLIX.COM')]

In [None]:
august[(august['userId'] == 663) & (august['storeName'] == 'NETFLIX.COM')]

In [None]:
september[(september['userId'] == 663) & (september['storeName'] == 'NETFLIX.COM')]

In [None]:
october[(october['userId'] == 663) & (october['storeName'] == 'NETFLIX.COM')]

In [None]:
july[july['userId'] == 663]