# [CSI-30] Atividade 4  - Predictive analytics

##### Importação de Bibliotecas

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pymssql
import csv
import time
import math
import warnings

from sklearn.linear_model import LinearRegression
from datetime import datetime, timedelta

In [2]:
warnings.filterwarnings('ignore')

In [3]:
# pd.set_option('display.max_rows', None)

## Criação dos SGBD (MSSQL Server 2017)

In [4]:
!docker-compose -f ../docker-compose.yml up -d

/bin/bash: /home/lucas/anaconda3/lib/libtinfo.so.6: no version information available (required by /bin/bash)
[1A[1B[0G[?25l[+] Running 1/0
 [32m✔[0m Network csi-30-at4_default        [32mCreat...[0m                              [34m0.1s [0m
 ⠋ Container csi-30-at4-sqlserver-1  Creating                              [34m0.0s [0m
[?25h[1A[1A[1A[0G[?25l[+] Running 1/2
 [32m✔[0m Network csi-30-at4_default        [32mCreat...[0m                              [34m0.1s [0m
 ⠙ Container csi-30-at4-sqlserver-1  Creating                              [34m0.1s [0m
[?25h[1A[1A[1A[0G[?25l[34m[+] Running 2/2[0m
 [32m✔[0m Network csi-30-at4_default        [32mCreat...[0m                              [34m0.1s [0m
 [32m✔[0m Container csi-30-at4-sqlserver-1  [32mCreated[0m                               [34m0.1s [0m
[?25h[1A[1A[1A[0G[?25l[34m[+] Running 2/2[0m
 [32m✔[0m Network csi-30-at4_default        [32mCreat...[0m                              [

## Conexão com o MSSQL Server 2017

#### Aguarde o container inicializar!

In [5]:
time.sleep(10)

In [6]:
conn = pymssql.connect(host='localhost', user='sa', password='Pent@gon1979', database='master')

In [7]:
conn.autocommit(True)

In [8]:
cursor = conn.cursor()

### Modelo Dimensional

![schema_dim](../images/schema_dim.png)

### Cria da tabela e relacionamentos do Banco de dados Dimensional

In [9]:
cursor.execute("CREATE DATABASE FI_DW;")

In [10]:
with open('../sql/ddl_dimensional.sql', 'r') as file:
    ddl_create_dimensional = file.read()

In [11]:
commands = [command.strip() for command in ddl_create_dimensional.split('GO') if command.strip()]

In [12]:
try:
    for command in commands:
        cursor.execute(command)
    
except Exception as e:
    print(f"Erro: {str(e)}")

## Extraction

##### Informações Diárias

In [13]:
path_files_date = [
                                                                                    202111, 202112,
    202201, 202202, 202203, 202204, 202205, 202206, 202207, 202208, 202209, 202210, 202211, 202212,
    202301, 202302, 202303, 202304, 202305, 202306, 202307, 202308, 202309, 202310  
]

In [14]:
inf_diario = pd.DataFrame()

In [15]:
for date in path_files_date:
    new_df = f"../data/inf_diario/inf_diario_fi_{date}.csv"
    inf_diario = pd.concat([inf_diario, pd.read_csv(new_df, sep=';')], axis=0)

##### Informações cadastrais

In [16]:
cad = pd.read_csv('../data/cad/cad_fi.csv', sep=';', encoding='iso-8859-1', low_memory=False)

## Transformation and Load

#### Dimension Controller

![dim_Controller](../images/dim_Controller.png)

In [17]:
def create_dim_Controller(conn, cursor, cad):
    try:
        for cnpj in cad['CNPJ_CONTROLADOR'].unique():
            temp = cad[cad['CNPJ_CONTROLADOR'] == cnpj][['CONTROLADOR']]
            
            if len(temp) == 0:
                continue

            controller = temp.iloc[0, 0]
            command = ("INSERT INTO dim_Controller (NameController, CNPJController) VALUES (%s, %s)")
            cursor.execute(command, (controller, cnpj))    
            conn.commit()

    except Exception as e:
        print(f"Erro: {str(e)}")
        conn.rollback()

In [18]:
create_dim_Controller(conn, cursor, cad)

#### Dimension Auditor

![dim_Auditor](../images/dim_Auditor.png)

In [19]:
def create_dim_Auditor(conn, cursor, cad):
    try:
        for cnpj in cad['CNPJ_AUDITOR'].unique():
            temp = cad[cad['CNPJ_AUDITOR'] == cnpj][['AUDITOR']]
            
            if len(temp) == 0:
                continue

            auditor = temp.iloc[0, 0]
            command = ("INSERT INTO dim_Auditor (NameAuditor, CNPJAuditor) VALUES (%s, %s)")
            cursor.execute(command, (auditor, cnpj))    
            conn.commit()
        
    except Exception as e:
        print(f"Erro: {str(e)}")
        conn.rollback()

In [20]:
create_dim_Auditor(conn, cursor, cad)

#### Dimension Custodian

![dim_Custodian](../images/dim_Custodian.png)

In [21]:
def create_dim_Custodian(conn, cursor, cad):
    try:
        for cnpj in cad['CNPJ_CUSTODIANTE'].unique():
            temp = cad[cad['CNPJ_CUSTODIANTE'] == cnpj][['CUSTODIANTE']]
            
            if len(temp) == 0:
                continue

            custodian = temp.iloc[0, 0]
            command = ("INSERT INTO dim_Custodian (NameCustodian, CNPJCustodian) VALUES (%s, %s)")
            cursor.execute(command, (custodian, cnpj))    
            conn.commit()
            
    
    except Exception as e:
        print(f"Erro: {str(e)}")
        conn.rollback()

In [22]:
create_dim_Custodian(conn, cursor, cad)

#### Dimension Admin

![dim_Admin](../images/dim_Admin.png)

In [23]:
def create_dim_Admin(conn, cursor, cad):
    try:
        for cnpj in cad['CNPJ_ADMIN'].unique():
            temp = cad[cad['CNPJ_ADMIN'] == cnpj][['ADMIN']]
            
            if len(temp) == 0:
                continue

            admin = temp.iloc[0, 0]
            command = ("INSERT INTO dim_Admin (NameAdmin, CNPJAdmin) VALUES (%s, %s)")            
            cursor.execute(command, (admin, cnpj))    
            conn.commit()
            
    except Exception as e:
        print(f"Erro: {str(e)}")
        conn.rollback()

In [24]:
create_dim_Admin(conn, cursor, cad)

#### Dimension Manager

![dim_Manager](../images/dim_Manager.png)

In [25]:
def create_dim_Manager(conn, cursor, cad):
    try:
        for CpfCnpjManager in cad['CPF_CNPJ_GESTOR'].unique():
            temp = cad[cad['CPF_CNPJ_GESTOR'] == CpfCnpjManager][['GESTOR', 'PF_PJ_GESTOR']]
            if len(temp) == 0:
                continue

            manager = temp.iloc[0, 0]
            PfPjGestor = temp.iloc[0, 1]
            command = ("INSERT INTO dim_Manager (CPF_CNPJManager, NameJManager, PF_PJ_Manager) VALUES (%s, %s, %s)")
            cursor.execute(command, (CpfCnpjManager, manager, PfPjGestor))    
            conn.commit()
        
    except Exception as e:
        print(f"Erro: {str(e)}")
        conn.rollback()

In [None]:
create_dim_Manager(conn, cursor, cad)

#### Dimension Investiment Fund

![dim_Investment_Fund](../images/dim_Investment_Fund.png)

In [None]:
def tratar_nan(valor):
    return None if valor == 'nan' or (isinstance(valor, (float, int)) and math.isnan(valor)) else valor

In [None]:
def create_dim_Investment_Fund(conn, cursor, cad):
    for cnpj in cad['CNPJ_FUNDO'].unique():
        try:

            temp = cad[cad['CNPJ_FUNDO'] == cnpj][['DENOM_SOCIAL', 'CNPJ_FUNDO', 'CLASSE_ANBIMA', 'DT_REG', 'DT_CONST',
                                                   'CD_CVM', 'SIT', 'TAXA_PERFM', 'TAXA_ADM', 'DIRETOR', 'FUNDO_COTAS',
                                                   'FUNDO_EXCLUSIVO', 'CLASSE', 'CONDOM', 'RENTAB_FUNDO', 'TP_FUNDO',
                                                   'PUBLICO_ALVO', 'VL_PATRIM_LIQ']]
                
            for index, row in temp.iterrows():

                DenomSocial = tratar_nan(row[0])
                CNPJ = tratar_nan(row[1])
                ClassAMBIMA = tratar_nan(row[2])
                DataReg = tratar_nan(row[3])
                DataConst = tratar_nan(row[4])
                CodCVM = tratar_nan(row[5])
                Situation = tratar_nan(row[6])
                TaxPerfomance = tratar_nan(row[7])
                TaxAdm = tratar_nan(row[8])
                Director = tratar_nan(row[9])
                FundCotas = tratar_nan(row[10])
                FundExclusive = tratar_nan(row[11])
                Classe = tratar_nan(row[12])
                Condom = tratar_nan(row[13])
                RentabFund = tratar_nan(row[14])
                TypeFund = tratar_nan(row[15])
                TargetAudience = tratar_nan(row[16])
                NetWorth = tratar_nan(row[17])

                if FundCotas is not None:
                    FundCotas = 1 if FundCotas == 'S' else 0

                if FundExclusive is not None:
                    FundExclusive = 1 if FundExclusive == 'S' else 0

                command = ("""
                    INSERT INTO dim_Investment_Fund (
                        DenomSocial, 
                        CNPJ, 
                        ClassAMBIMA, 
                        DataReg, 
                        DataConst, 
                        CodCVM, 
                        Situation, 
                        TaxPerfomance, 
                        TaxAdm, 
                        Director, 
                        FundCotas, 
                        FundExclusive, 
                        Classe, 
                        Condom, 
                        RentabFund, 
                        TypeFund, 
                        TargetAudience,
                        NetWorth)
                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                """
                )

                cursor.execute(command, (
                    DenomSocial, 
                    CNPJ, 
                    ClassAMBIMA, 
                    DataReg, 
                    DataConst, 
                    CodCVM, 
                    Situation, 
                    TaxPerfomance, 
                    TaxAdm, 
                    Director, 
                    FundCotas, 
                    FundExclusive, 
                    Classe, 
                    Condom, 
                    RentabFund, 
                    TypeFund, 
                    TargetAudience,
                    NetWorth
                ))

                conn.commit()
            
        except Exception as e:
            print(f"Erro: {str(e)}")
            conn.rollback()

In [None]:
create_dim_Investment_Fund(conn, cursor, cad)

#### Dimension Time

![dim_Time](../images/dim_Time.png)

In [None]:
def create_dim_Time(conn, cursor, datetime, day, week, month, quarter, year):

    try:
        command = ("INSERT INTO dim_Time ( \
                    datetime, day, week, month, quarter, year) \
                    VALUES (%s, %s, %s, %s, %s, %s)")

        cursor.execute(command, (datetime, day, week, month, quarter, year))

        conn.commit()

    except Exception as e:
        print(f"Erro: {str(e)}")
        conn.rollback()

In [None]:
auxTime = set()

In [None]:
for _, row in inf_diario.iterrows():
    auxTime.add(row['DT_COMPTC'])

In [None]:
auxTimeList = sorted(list(auxTime))

In [None]:
for date_string in auxTimeList:
 
    dt = datetime.strptime(date_string, '%Y-%m-%d')

    day = dt.day
    week = dt.strftime('%U')
    quarter = math.ceil(dt.month / 4)
    year = dt.year
    month = dt.month   

    create_dim_Time(
        conn,
        cursor,
        date_string,
        day,
        week,
        month,
        quarter,
        year
    )

In [None]:
del auxTime
del auxTimeList

#### Fact Value

![fact_Value](../images/fact_Value.png)

In [None]:
surrogate_dim_Time = pd.read_sql(f"SELECT id_dim_time, datetime FROM dim_Time", conn) \
    .set_index('datetime')

In [None]:
surrogate_dim_Investment_Fund = pd.read_sql(f"SELECT id_dim_investment_fund, CNPJ, Situation FROM dim_Investment_Fund", conn) \
    .set_index('CNPJ')

In [None]:
surrogate_dim_Manager = pd.read_sql(f"SELECT id_dim_manager, CPF_CNPJManager FROM dim_Manager", conn) \
    .set_index('CPF_CNPJManager')

In [None]:
surrogate_dim_Admin = pd.read_sql(f"SELECT id_dim_admin, CNPJAdmin FROM dim_Admin", conn) \
    .set_index('CNPJAdmin')

In [None]:
surrogate_dim_Custodian = pd.read_sql(f"SELECT id_dim_custodian, CNPJCustodian FROM dim_Custodian", conn) \
    .set_index('CNPJCustodian')

In [None]:
surrogate_dim_Auditor = pd.read_sql(f"SELECT id_dim_auditor, CNPJAuditor FROM dim_Auditor", conn) \
    .set_index('CNPJAuditor')

In [None]:
surrogate_dim_Controller = pd.read_sql(f"SELECT id_dim_controller, CNPJController FROM dim_Controller", conn) \
    .set_index('CNPJController')

In [None]:
def create_fact_Value(conn, cursor, cad):
    i = 0

    for index, row in inf_diario.iterrows():
    
        try:

            cnpj = row['CNPJ_FUNDO']
            
            if not cnpj:
                continue
            
            info_cad = cad[cad['CNPJ_FUNDO'] == cnpj][['CPF_CNPJ_GESTOR', 'CNPJ_ADMIN', 'CNPJ_CUSTODIANTE',
                                                       'CNPJ_AUDITOR', 'CNPJ_CONTROLADOR']]

            CPF_CNPJ_Manager = info_cad.iloc[0, 0]
            CNPJ_Admin = info_cad.iloc[0, 1]
            CNPJ_Custodian = info_cad.iloc[0, 2] 
            CNPJ_Auditor = info_cad.iloc[0, 3]
            CNPJ_Controller = info_cad.iloc[0, 4]
            date = datetime.strptime(row['DT_COMPTC'], '%Y-%m-%d').date()
            
            if not date:
                continue

            CPF_CNPJ_Manager = tratar_nan(CPF_CNPJ_Manager)
            CNPJ_Admin = tratar_nan(CNPJ_Admin)
            CNPJ_Custodian = tratar_nan(CNPJ_Custodian)
            CNPJ_Auditor = tratar_nan(CNPJ_Auditor)
            CNPJ_Controller = tratar_nan(CNPJ_Controller)

            id_dim_time = surrogate_dim_Time.loc[date, 'id_dim_time']

            try:
                id_dim_investment_fund = surrogate_dim_Investment_Fund[(surrogate_dim_Investment_Fund.index == cnpj) & \
                                  (surrogate_dim_Investment_Fund['Situation'] == 'EM FUNCIONAMENTO NORMAL')].iloc[0, 0]
            except:
                continue

            if CPF_CNPJ_Manager:
                id_dim_manager = int(surrogate_dim_Manager.loc[CPF_CNPJ_Manager, 'id_dim_manager'])
            else:
                id_dim_manager = None

            if CNPJ_Admin:
                id_dim_admin = int(surrogate_dim_Admin.loc[CNPJ_Admin, 'id_dim_admin'])
            else:
                id_dim_admin = None

            if CNPJ_Custodian:
                id_dim_custodian = int(surrogate_dim_Custodian.loc[CNPJ_Custodian, 'id_dim_custodian'])
            else:
                id_dim_custodian = None

            if CNPJ_Auditor:
                id_dim_auditor = int(surrogate_dim_Auditor.loc[CNPJ_Auditor, 'id_dim_auditor'])
            else:
                id_dim_auditor = None

            if CNPJ_Controller:
                id_dim_controller = int(surrogate_dim_Controller.loc[CNPJ_Controller, 'id_dim_controller'])
            else:
                id_dim_controller = None

            NetWorth = tratar_nan(row[5])
            Total = tratar_nan(row[3])
            Quota = tratar_nan(row[4])
            NQuotaHolders = tratar_nan(row[8])
            CaptDay = tratar_nan(row[6])
            RescueDay = tratar_nan(row[7])

            command = ("""
                INSERT INTO fact_Value (
                    id_dim_time,
                    id_dim_investment_fund,
                    id_dim_manager,
                    id_dim_admin,
                    id_dim_custodian,
                    id_dim_auditor,
                    id_dim_controller,
                    NetWorth,
                    Total,
                    Quota,
                    NQuotaHolders,
                    CaptDay,
                    RescueDay)
                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
            """)


            values = (
                int(id_dim_time),
                int(id_dim_investment_fund),
                id_dim_manager,
                id_dim_admin,
                id_dim_custodian,
                id_dim_auditor,
                id_dim_controller,
                NetWorth,
                Total,
                Quota,
                NQuotaHolders,
                CaptDay,
                RescueDay
            ) 

            cursor.execute(command, values)
            conn.commit()

            print(i)
            i += 1

        except Exception as e:
            print(f"""
                    cnpj: {cnpj}
                    id_dim_time: {id_dim_time}
                    id_dim_investment_fund: {id_dim_investment_fund}
                    id_dim_manager: {id_dim_manager}
                    id_dim_admin: {id_dim_admin}
                    id_dim_custodian: {id_dim_custodian}
                    id_dim_auditor: {id_dim_auditor}
                    id_dim_controller: {id_dim_controller}
                    NetWorth: {NetWorth}
                    Total: {Total}
                    Quota: {Quota}
                    NQuotaHolders: {NQuotaHolders}
                    CaptDay: {CaptDay}
                    RescueDay: {RescueDay}
                """
            )

            conn.rollback()
            raise e
            break

In [None]:
create_fact_Value(conn, cursor, cad)

## Data analytics

In [None]:
cursor.execute('USE [FI_DW]')