In [1]:
from pyspark.sql import SparkSession
import pyspark.sql.functions as F

spark = (
    SparkSession.builder.appName("pyspark_aula_12")
    .config("spark.sql.repl.eagerEval.enabled", True)
    .getOrCreate()
)

In [None]:
df = spark.read.parquet("./bases/LOGINS.parquet")

# Filtrando com PySpark 

In [5]:
df.where(F.col('estado').isin('PB', 'PE', 'AL')).limit(5)

cpf,email,senha,data_de_nascimento,estado,data_cadastro,ipv4,cor_favorita,profissao,telefone
875.130.249-79,da-rocharebeca@ho...,FlOoH0I6)6,1972-10-05,PE,2023-02-11,126.171.28.31,Violeta,Modelista,+55 84 1130-9822
047.528.369-47,almeidaevelyn@gma...,@8JmQG4t@x,1980-11-13,AL,2023-02-15,92.65.10.245,Marrom,Paisagista,+55 (084) 8760-9088
089.462.357-56,vitoria83@ig.com.br,(8gXQeIeA3,2014-11-18,AL,2023-02-24,182.161.33.191,Marrom,Oculista,+55 (084) 8655 2371
308.942.751-60,hpires@gmail.com,#^yq9HIt0h,1996-03-08,PE,2023-02-14,139.209.144.207,Ciano,Estofador,+55 31 1784 8068
149.857.260-01,mda-costa@hotmail...,4hf6EYFb$m,2005-08-07,AL,2023-01-08,101.34.19.246,Rosa,Gagsman,0800 183 2070


# Filtrando com SQL

In [11]:
spark.sql('''
    select * 
    from {tabela}
    where estado in ('PB', 'PE', 'AL')
    ''', tabela=df).limit(5)

cpf,email,senha,data_de_nascimento,estado,data_cadastro,ipv4,cor_favorita,profissao,telefone
875.130.249-79,da-rocharebeca@ho...,FlOoH0I6)6,1972-10-05,PE,2023-02-11,126.171.28.31,Violeta,Modelista,+55 84 1130-9822
047.528.369-47,almeidaevelyn@gma...,@8JmQG4t@x,1980-11-13,AL,2023-02-15,92.65.10.245,Marrom,Paisagista,+55 (084) 8760-9088
089.462.357-56,vitoria83@ig.com.br,(8gXQeIeA3,2014-11-18,AL,2023-02-24,182.161.33.191,Marrom,Oculista,+55 (084) 8655 2371
308.942.751-60,hpires@gmail.com,#^yq9HIt0h,1996-03-08,PE,2023-02-14,139.209.144.207,Ciano,Estofador,+55 31 1784 8068
149.857.260-01,mda-costa@hotmail...,4hf6EYFb$m,2005-08-07,AL,2023-01-08,101.34.19.246,Rosa,Gagsman,0800 183 2070


#### Existe a opção de criar uma tabela para ser usada no SQL sem a necessidade de utilizar como parametro

In [12]:
tabela_pb = df.where(F.col('estado') == 'PB')
tabela_pe = df.where(F.col('estado') == 'PE')
tabela_al = df.where(F.col('estado') == 'AL')

In [13]:
tabela_pb.registerTempTable('tabela_pb')
tabela_pe.registerTempTable('tabela_pe')
tabela_al.registerTempTable('tabela_al')



In [14]:
spark.sql('''
    select * 
    from tabela_pb
    ''')

cpf,email,senha,data_de_nascimento,estado,data_cadastro,ipv4,cor_favorita,profissao,telefone
023.965.814-06,daniela34@ig.com.br,a6erVAf%)9,1924-10-14,PB,2023-01-03,204.207.148.123,Amarelo,Ortodontista,+55 31 5756 9535
189.462.350-98,onunes@hotmail.com,pIKUqi2)!5,1951-05-15,PB,2023-01-11,198.91.114.111,Laranja,Político,0800-590-6357
279.430.816-50,gmartins@bol.com.br,_d3VPWjvc2,1941-07-24,PB,2023-01-27,167.248.54.174,Azul Escuro,Tecnólogo Em Proc...,+55 21 0579-4147
518.490.623-15,ana08@yahoo.com.br,Dg9epjTlI),2003-10-20,PB,2023-02-02,104.37.135.232,Marrom,Skatista,+55 (021) 4307 4419
162.047.598-76,stephany89@hotmai...,t%^1USz$DN,1944-06-16,PB,2023-02-01,68.193.32.227,Magenta,Cartunista,(071) 3336 6068
785.469.302-10,martinsbeatriz@uo...,tn0qTo9$&N,1969-09-24,PB,2023-01-05,123.135.60.13,Verde Claro,Copeiro,71 0790-7505
672.549.308-47,sarah76@bol.com.br,V^a2T8%p)$,1912-03-17,PB,2023-03-03,4.68.100.185,Amarelo,Tecnólogo Em Proc...,51 3990 9542
285.609.317-59,claricerocha@gmai...,aT@4HKEkUI,1964-11-21,PB,2023-03-14,29.171.64.135,Laranja,Peão De Rodeiro,+55 11 6134-3382
295.031.768-59,piresenzo-gabriel...,I2!Te0r_#P,1958-03-28,PB,2023-01-10,94.51.224.152,Amarelo,Espeleologista,0500-415-3076
968.031.572-02,correialeticia@ho...,F+0Gl&gOLj,2015-02-19,PB,2023-01-28,137.121.8.37,Ciano,Psiquiatra,61 3171 0390


In [16]:
spark.sql('''
    select * 
    from tabela_pb
    UNION ALL
    select * 
    from tabela_pe
    UNION ALL
    select * 
    from tabela_al order by estado asc
    ''')

cpf,email,senha,data_de_nascimento,estado,data_cadastro,ipv4,cor_favorita,profissao,telefone
047.528.369-47,almeidaevelyn@gma...,@8JmQG4t@x,1980-11-13,AL,2023-02-15,92.65.10.245,Marrom,Paisagista,+55 (084) 8760-9088
089.462.357-56,vitoria83@ig.com.br,(8gXQeIeA3,2014-11-18,AL,2023-02-24,182.161.33.191,Marrom,Oculista,+55 (084) 8655 2371
149.857.260-01,mda-costa@hotmail...,4hf6EYFb$m,2005-08-07,AL,2023-01-08,101.34.19.246,Rosa,Gagsman,0800 183 2070
492.568.703-47,ksouza@bol.com.br,+)az^0NfXB,1976-09-11,AL,2023-02-14,50.116.252.134,Azul Escuro,Contramestre Em T...,(081) 7077 2234
874.601.952-94,da-motavitor@bol....,o$7WpLQxwv,1995-04-25,AL,2023-02-01,94.25.133.33,Amarelo,Conferente De Rec...,0900-325-7445
657.214.809-20,ericklopes@yahoo....,XbRGy2Rl&A,1958-01-09,AL,2023-02-13,202.177.238.170,Amarelo,Engenheiro De Ilu...,(084) 7938 6983
325.790.861-02,cauamendes@gmail.com,O5!x0OjlfW,2011-05-30,AL,2023-01-08,39.75.199.7,Azul,Profissional De H...,+55 (084) 5929-0843
416.805.379-20,giovanna74@hotmai...,%4P39oUloo,1978-08-17,AL,2023-01-31,208.222.188.39,Verde Escuro,Contatólogo,(011) 2667 4246
164.352.978-19,vianavitor-gabrie...,$6mSRa!s%S,1990-11-06,AL,2023-03-08,17.3.19.99,Verde Escuro,Investigador Part...,+55 (021) 8237-1575
517.830.694-57,laura27@uol.com.br,&s6Hqc2EfK,1975-06-19,AL,2023-01-03,71.196.222.150,Azul Escuro,Jogador De Handebol,+55 31 1910-4434
