In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.window import Window

In [2]:
spark = ( SparkSession \
         .builder
         .master('local')
         .appName('df_fifa_world_cup_2')
         .getOrCreate()
)

In [3]:
df = spark.read.csv('arquivo/wc2018-players.csv', header=True)

In [4]:
df.printSchema()

root
 |-- Team: string (nullable = true)
 |-- #: string (nullable = true)
 |-- Pos.: string (nullable = true)
 |-- FIFA Popular Name: string (nullable = true)
 |-- Birth Date: string (nullable = true)
 |-- Shirt Name: string (nullable = true)
 |-- Club: string (nullable = true)
 |-- Height: string (nullable = true)
 |-- Weight: string (nullable = true)



In [6]:
df.show(5)

+---------+---+----+------------------+----------+----------+--------------------+------+------+
|     Team|  #|Pos.| FIFA Popular Name|Birth Date|Shirt Name|                Club|Height|Weight|
+---------+---+----+------------------+----------+----------+--------------------+------+------+
|Argentina|  3|  DF|TAGLIAFICO Nicolas|31.08.1992|TAGLIAFICO|      AFC Ajax (NED)|   169|    65|
|Argentina| 22|  MF|    PAVON Cristian|21.01.1996|     PAVÓN|CA Boca Juniors (...|   169|    65|
|Argentina| 15|  MF|    LANZINI Manuel|15.02.1993|   LANZINI|West Ham United F...|   167|    66|
|Argentina| 18|  DF|    SALVIO Eduardo|13.07.1990|    SALVIO|    SL Benfica (POR)|   167|    69|
|Argentina| 10|  FW|      MESSI Lionel|24.06.1987|     MESSI|  FC Barcelona (ESP)|   170|    72|
+---------+---+----+------------------+----------+----------+--------------------+------+------+
only showing top 5 rows



In [9]:
df = df.withColumnRenamed('Team', 'Selecao')\
       .withColumnRenamed('#', 'Numero')\
       .withColumnRenamed('Pos.', 'Posicao')\
       .withColumnRenamed('FIFA Popular Name', 'Nome_FIFA')\
       .withColumnRenamed('Birth Date', 'Nascimento')\
       .withColumnRenamed('Shirt Name', 'Nome_Camiseta')\
       .withColumnRenamed('Club', 'Time')\
       .withColumnRenamed('Height', 'Altura')\
       .withColumnRenamed('Weight', 'Peso')

df.show(5)

+---------+------+-------+------------------+----------+-------------+--------------------+------+----+
|  Selecao|Numero|Posicao|         Nome_FIFA|Nascimento|Nome_Camiseta|                Time|Altura|Peso|
+---------+------+-------+------------------+----------+-------------+--------------------+------+----+
|Argentina|     3|     DF|TAGLIAFICO Nicolas|31.08.1992|   TAGLIAFICO|      AFC Ajax (NED)|   169|  65|
|Argentina|    22|     MF|    PAVON Cristian|21.01.1996|        PAVÓN|CA Boca Juniors (...|   169|  65|
|Argentina|    15|     MF|    LANZINI Manuel|15.02.1993|      LANZINI|West Ham United F...|   167|  66|
|Argentina|    18|     DF|    SALVIO Eduardo|13.07.1990|       SALVIO|    SL Benfica (POR)|   167|  69|
|Argentina|    10|     FW|      MESSI Lionel|24.06.1987|        MESSI|  FC Barcelona (ESP)|   170|  72|
+---------+------+-------+------------------+----------+-------------+--------------------+------+----+
only showing top 5 rows



In [13]:
df = df.withColumn('Ano', substring('Nascimento', -4,4))
df = df.withColumn('Mes', substring('Nascimento', -7,2))
df = df.withColumn('Dia', substring('Nascimento', -10,2))

In [14]:
df.show(5)

+---------+------+-------+------------------+----------+-------------+--------------------+------+----+----+---+---+
|  Selecao|Numero|Posicao|         Nome_FIFA|Nascimento|Nome_Camiseta|                Time|Altura|Peso| Ano|Mes|Dia|
+---------+------+-------+------------------+----------+-------------+--------------------+------+----+----+---+---+
|Argentina|     3|     DF|TAGLIAFICO Nicolas|31.08.1992|   TAGLIAFICO|      AFC Ajax (NED)|   169|  65|1992| 08| 31|
|Argentina|    22|     MF|    PAVON Cristian|21.01.1996|        PAVÓN|CA Boca Juniors (...|   169|  65|1996| 01| 21|
|Argentina|    15|     MF|    LANZINI Manuel|15.02.1993|      LANZINI|West Ham United F...|   167|  66|1993| 02| 15|
|Argentina|    18|     DF|    SALVIO Eduardo|13.07.1990|       SALVIO|    SL Benfica (POR)|   167|  69|1990| 07| 13|
|Argentina|    10|     FW|      MESSI Lionel|24.06.1987|        MESSI|  FC Barcelona (ESP)|   170|  72|1987| 06| 24|
+---------+------+-------+------------------+----------+--------

In [16]:
df = df.withColumn('Data_Nascimento', concat_ws('-', 'Ano', 'Mes', 'Dia').cast(DateType()))

In [18]:
df.show(5)

+---------+------+-------+------------------+----------+-------------+--------------------+------+----+----+---+---+---------------+
|  Selecao|Numero|Posicao|         Nome_FIFA|Nascimento|Nome_Camiseta|                Time|Altura|Peso| Ano|Mes|Dia|Data_Nascimento|
+---------+------+-------+------------------+----------+-------------+--------------------+------+----+----+---+---+---------------+
|Argentina|     3|     DF|TAGLIAFICO Nicolas|31.08.1992|   TAGLIAFICO|      AFC Ajax (NED)|   169|  65|1992| 08| 31|     1992-08-31|
|Argentina|    22|     MF|    PAVON Cristian|21.01.1996|        PAVÓN|CA Boca Juniors (...|   169|  65|1996| 01| 21|     1996-01-21|
|Argentina|    15|     MF|    LANZINI Manuel|15.02.1993|      LANZINI|West Ham United F...|   167|  66|1993| 02| 15|     1993-02-15|
|Argentina|    18|     DF|    SALVIO Eduardo|13.07.1990|       SALVIO|    SL Benfica (POR)|   167|  69|1990| 07| 13|     1990-07-13|
|Argentina|    10|     FW|      MESSI Lionel|24.06.1987|        MESSI

In [21]:
df = df.drop('Nascimento')
df.show(5)

+---------+------+-------+------------------+-------------+--------------------+------+----+----+---+---+---------------+
|  Selecao|Numero|Posicao|         Nome_FIFA|Nome_Camiseta|                Time|Altura|Peso| Ano|Mes|Dia|Data_Nascimento|
+---------+------+-------+------------------+-------------+--------------------+------+----+----+---+---+---------------+
|Argentina|     3|     DF|TAGLIAFICO Nicolas|   TAGLIAFICO|      AFC Ajax (NED)|   169|  65|1992| 08| 31|     1992-08-31|
|Argentina|    22|     MF|    PAVON Cristian|        PAVÓN|CA Boca Juniors (...|   169|  65|1996| 01| 21|     1996-01-21|
|Argentina|    15|     MF|    LANZINI Manuel|      LANZINI|West Ham United F...|   167|  66|1993| 02| 15|     1993-02-15|
|Argentina|    18|     DF|    SALVIO Eduardo|       SALVIO|    SL Benfica (POR)|   167|  69|1990| 07| 13|     1990-07-13|
|Argentina|    10|     FW|      MESSI Lionel|        MESSI|  FC Barcelona (ESP)|   170|  72|1987| 06| 24|     1987-06-24|
+---------+------+------

In [23]:
df2 = df

In [24]:
df.show(5)

+---------+------+-------+------------------+-------------+--------------------+------+----+----+---+---+---------------+
|  Selecao|Numero|Posicao|         Nome_FIFA|Nome_Camiseta|                Time|Altura|Peso| Ano|Mes|Dia|Data_Nascimento|
+---------+------+-------+------------------+-------------+--------------------+------+----+----+---+---+---------------+
|Argentina|     3|     DF|TAGLIAFICO Nicolas|   TAGLIAFICO|      AFC Ajax (NED)|   169|  65|1992| 08| 31|     1992-08-31|
|Argentina|    22|     MF|    PAVON Cristian|        PAVÓN|CA Boca Juniors (...|   169|  65|1996| 01| 21|     1996-01-21|
|Argentina|    15|     MF|    LANZINI Manuel|      LANZINI|West Ham United F...|   167|  66|1993| 02| 15|     1993-02-15|
|Argentina|    18|     DF|    SALVIO Eduardo|       SALVIO|    SL Benfica (POR)|   167|  69|1990| 07| 13|     1990-07-13|
|Argentina|    10|     FW|      MESSI Lionel|        MESSI|  FC Barcelona (ESP)|   170|  72|1987| 06| 24|     1987-06-24|
+---------+------+------

In [28]:
num_linha = Window.partitionBy('Selecao').orderBy(desc('Altura'))

df.withColumn('n', row_number().over(num_linha)).show(30)

+---------+------+-------+------------------+-------------+--------------------+------+----+----+---+---+---------------+---+
|  Selecao|Numero|Posicao|         Nome_FIFA|Nome_Camiseta|                Time|Altura|Peso| Ano|Mes|Dia|Data_Nascimento|  n|
+---------+------+-------+------------------+-------------+--------------------+------+----+----+---+---+---------------+---+
|Argentina|     6|     DF|    FAZIO Federico|        FAZIO|       AS Roma (ITA)|   199|  85|1987| 03| 17|     1987-03-17|  1|
|Argentina|     1|     GK|     GUZMAN Nahuel|       GUZMÁN|   Tigres UANL (MEX)|   192|  90|1986| 02| 10|     1986-02-10|  2|
|Argentina|    16|     DF|       ROJO Marcos|         ROJO|Manchester United...|   189|  82|1990| 03| 20|     1990-03-20|  3|
|Argentina|    12|     GK|     ARMANI Franco|       ARMANI|CA River Plate (ARG)|   189|  85|1986| 10| 16|     1986-10-16|  4|
|Argentina|    23|     GK|CABALLERO Wilfredo|    CABALLERO|    Chelsea FC (ENG)|   186|  80|1981| 09| 28|     1981-09-

In [30]:
rank1 = Window.partitionBy('Selecao').orderBy(desc('Altura'))

df.withColumn('rank', rank().over(rank1)).show(24)

+---------+------+-------+------------------+-------------+--------------------+------+----+----+---+---+---------------+----+
|  Selecao|Numero|Posicao|         Nome_FIFA|Nome_Camiseta|                Time|Altura|Peso| Ano|Mes|Dia|Data_Nascimento|rank|
+---------+------+-------+------------------+-------------+--------------------+------+----+----+---+---+---------------+----+
|Argentina|     6|     DF|    FAZIO Federico|        FAZIO|       AS Roma (ITA)|   199|  85|1987| 03| 17|     1987-03-17|   1|
|Argentina|     1|     GK|     GUZMAN Nahuel|       GUZMÁN|   Tigres UANL (MEX)|   192|  90|1986| 02| 10|     1986-02-10|   2|
|Argentina|    16|     DF|       ROJO Marcos|         ROJO|Manchester United...|   189|  82|1990| 03| 20|     1990-03-20|   3|
|Argentina|    12|     GK|     ARMANI Franco|       ARMANI|CA River Plate (ARG)|   189|  85|1986| 10| 16|     1986-10-16|   3|
|Argentina|    23|     GK|CABALLERO Wilfredo|    CABALLERO|    Chelsea FC (ENG)|   186|  80|1981| 09| 28|     1

In [32]:
rank2 = Window.partitionBy('Selecao').orderBy(desc('Altura'))

df.withColumn('rank2', dense_rank().over(rank2)).show(25)

+---------+------+-------+------------------+-------------+--------------------+------+----+----+---+---+---------------+-----+
|  Selecao|Numero|Posicao|         Nome_FIFA|Nome_Camiseta|                Time|Altura|Peso| Ano|Mes|Dia|Data_Nascimento|rank2|
+---------+------+-------+------------------+-------------+--------------------+------+----+----+---+---+---------------+-----+
|Argentina|     6|     DF|    FAZIO Federico|        FAZIO|       AS Roma (ITA)|   199|  85|1987| 03| 17|     1987-03-17|    1|
|Argentina|     1|     GK|     GUZMAN Nahuel|       GUZMÁN|   Tigres UANL (MEX)|   192|  90|1986| 02| 10|     1986-02-10|    2|
|Argentina|    16|     DF|       ROJO Marcos|         ROJO|Manchester United...|   189|  82|1990| 03| 20|     1990-03-20|    3|
|Argentina|    12|     GK|     ARMANI Franco|       ARMANI|CA River Plate (ARG)|   189|  85|1986| 10| 16|     1986-10-16|    3|
|Argentina|    23|     GK|CABALLERO Wilfredo|    CABALLERO|    Chelsea FC (ENG)|   186|  80|1981| 09| 28

In [35]:
porcentagem = Window.partitionBy('Selecao').orderBy(desc('Altura'))

df.withColumn('%', percent_rank().over(porcentagem)).show(50)

+---------+------+-------+------------------+-------------+--------------------+------+----+----+---+---+---------------+--------------------+
|  Selecao|Numero|Posicao|         Nome_FIFA|Nome_Camiseta|                Time|Altura|Peso| Ano|Mes|Dia|Data_Nascimento|                   %|
+---------+------+-------+------------------+-------------+--------------------+------+----+----+---+---+---------------+--------------------+
|Argentina|     6|     DF|    FAZIO Federico|        FAZIO|       AS Roma (ITA)|   199|  85|1987| 03| 17|     1987-03-17|                 0.0|
|Argentina|     1|     GK|     GUZMAN Nahuel|       GUZMÁN|   Tigres UANL (MEX)|   192|  90|1986| 02| 10|     1986-02-10|0.045454545454545456|
|Argentina|    16|     DF|       ROJO Marcos|         ROJO|Manchester United...|   189|  82|1990| 03| 20|     1990-03-20| 0.09090909090909091|
|Argentina|    12|     GK|     ARMANI Franco|       ARMANI|CA River Plate (ARG)|   189|  85|1986| 10| 16|     1986-10-16| 0.09090909090909091|

In [39]:
parte = Window.partitionBy('Selecao').orderBy(desc('Altura'))

df.withColumn('partes', ntile(5).over(parte)).show(23)

+---------+------+-------+------------------+-------------+--------------------+------+----+----+---+---+---------------+------+
|  Selecao|Numero|Posicao|         Nome_FIFA|Nome_Camiseta|                Time|Altura|Peso| Ano|Mes|Dia|Data_Nascimento|partes|
+---------+------+-------+------------------+-------------+--------------------+------+----+----+---+---+---------------+------+
|Argentina|     6|     DF|    FAZIO Federico|        FAZIO|       AS Roma (ITA)|   199|  85|1987| 03| 17|     1987-03-17|     1|
|Argentina|     1|     GK|     GUZMAN Nahuel|       GUZMÁN|   Tigres UANL (MEX)|   192|  90|1986| 02| 10|     1986-02-10|     1|
|Argentina|    16|     DF|       ROJO Marcos|         ROJO|Manchester United...|   189|  82|1990| 03| 20|     1990-03-20|     1|
|Argentina|    12|     GK|     ARMANI Franco|       ARMANI|CA River Plate (ARG)|   189|  85|1986| 10| 16|     1986-10-16|     1|
|Argentina|    23|     GK|CABALLERO Wilfredo|    CABALLERO|    Chelsea FC (ENG)|   186|  80|1981|

In [41]:
df = df.drop('Nome_FIFA')

In [43]:
degrau = Window.partitionBy('Selecao').orderBy(desc('Altura'))

df.withColumn('degrau', lag('Peso').over(degrau)).show(23)

+---------+------+-------+-------------+--------------------+------+----+----+---+---+---------------+------+
|  Selecao|Numero|Posicao|Nome_Camiseta|                Time|Altura|Peso| Ano|Mes|Dia|Data_Nascimento|degrau|
+---------+------+-------+-------------+--------------------+------+----+----+---+---+---------------+------+
|Argentina|     6|     DF|        FAZIO|       AS Roma (ITA)|   199|  85|1987| 03| 17|     1987-03-17|  NULL|
|Argentina|     1|     GK|       GUZMÁN|   Tigres UANL (MEX)|   192|  90|1986| 02| 10|     1986-02-10|    85|
|Argentina|    16|     DF|         ROJO|Manchester United...|   189|  82|1990| 03| 20|     1990-03-20|    90|
|Argentina|    12|     GK|       ARMANI|CA River Plate (ARG)|   189|  85|1986| 10| 16|     1986-10-16|    82|
|Argentina|    23|     GK|    CABALLERO|    Chelsea FC (ENG)|   186|  80|1981| 09| 28|     1981-09-28|    85|
|Argentina|     9|     FW|      HIGUAÍN|   Juventus FC (ITA)|   184|  75|1987| 12| 10|     1987-12-10|    80|
|Argentina

In [55]:
degrau = Window.partitionBy('Selecao').orderBy(desc('Altura'))

df.withColumn('degrau', lead('Altura').over(degrau)).show(23)

+---------+------+-------+-------------+--------------------+------+----+----+---+---+---------------+------+
|  Selecao|Numero|Posicao|Nome_Camiseta|                Time|Altura|Peso| Ano|Mes|Dia|Data_Nascimento|degrau|
+---------+------+-------+-------------+--------------------+------+----+----+---+---+---------------+------+
|Argentina|     6|     DF|        FAZIO|       AS Roma (ITA)|   199|  85|1987| 03| 17|     1987-03-17|   192|
|Argentina|     1|     GK|       GUZMÁN|   Tigres UANL (MEX)|   192|  90|1986| 02| 10|     1986-02-10|   189|
|Argentina|    16|     DF|         ROJO|Manchester United...|   189|  82|1990| 03| 20|     1990-03-20|   189|
|Argentina|    12|     GK|       ARMANI|CA River Plate (ARG)|   189|  85|1986| 10| 16|     1986-10-16|   186|
|Argentina|    23|     GK|    CABALLERO|    Chelsea FC (ENG)|   186|  80|1981| 09| 28|     1981-09-28|   184|
|Argentina|     9|     FW|      HIGUAÍN|   Juventus FC (ITA)|   184|  75|1987| 12| 10|     1987-12-10|   181|
|Argentina

In [60]:
df.groupBy('Selecao').agg({'Altura':'avg'}).orderBy('avg(Altura)', ascending=False).show()

+--------------+------------------+
|       Selecao|       avg(Altura)|
+--------------+------------------+
|        Serbia|186.69565217391303|
|       Denmark| 186.6086956521739|
|       Germany| 185.7826086956522|
|        Sweden| 185.7391304347826|
|       Iceland|185.52173913043478|
|       Belgium|185.34782608695653|
|       Croatia| 185.2608695652174|
|       Nigeria|184.52173913043478|
|       IR Iran|184.47826086956522|
|        Russia| 184.3913043478261|
|       Senegal|183.65217391304347|
|        France|183.30434782608697|
|        Poland|183.17391304347825|
|       Tunisia|183.08695652173913|
|   Switzerland|182.91304347826087|
|       England| 182.7391304347826|
|       Morocco|182.69565217391303|
|        Panama|182.17391304347825|
|Korea Republic| 181.8695652173913|
|       Uruguay|181.04347826086956|
+--------------+------------------+
only showing top 20 rows



In [64]:
df.groupBy('Selecao').agg(avg('Altura')).orderBy('avg(Altura)', ascending=False).show(32)

+--------------+------------------+
|       Selecao|       avg(Altura)|
+--------------+------------------+
|        Serbia|186.69565217391303|
|       Denmark| 186.6086956521739|
|       Germany| 185.7826086956522|
|        Sweden| 185.7391304347826|
|       Iceland|185.52173913043478|
|       Belgium|185.34782608695653|
|       Croatia| 185.2608695652174|
|       Nigeria|184.52173913043478|
|       IR Iran|184.47826086956522|
|        Russia| 184.3913043478261|
|       Senegal|183.65217391304347|
|        France|183.30434782608697|
|        Poland|183.17391304347825|
|       Tunisia|183.08695652173913|
|   Switzerland|182.91304347826087|
|       England| 182.7391304347826|
|       Morocco|182.69565217391303|
|        Panama|182.17391304347825|
|Korea Republic| 181.8695652173913|
|       Uruguay|181.04347826086956|
|         Egypt|             181.0|
|     Australia| 180.8695652173913|
|        Brazil| 180.7826086956522|
|      Colombia| 180.7826086956522|
|    Costa Rica|180.69565217

In [66]:
df.where('Selecao = "Brazil"').show()

+-------+------+-------+-------------+--------------------+------+----+----+---+---+---------------+
|Selecao|Numero|Posicao|Nome_Camiseta|                Time|Altura|Peso| Ano|Mes|Dia|Data_Nascimento|
+-------+------+-------+-------------+--------------------+------+----+----+---+---+---------------+
| Brazil|    18|     MF|         FRED|FC Shakhtar Donet...|   169|  64|1993| 03| 05|     1993-03-05|
| Brazil|    21|     FW|       TAISON|FC Shakhtar Donet...|   172|  64|1988| 01| 13|     1988-01-13|
| Brazil|    17|     MF|  FERNANDINHO|Manchester City F...|   179|  67|1985| 05| 04|     1985-05-04|
| Brazil|    22|     DF|       FAGNER|SC Corinthians (BRA)|   168|  67|1989| 06| 11|     1989-06-11|
| Brazil|    10|     FW|    NEYMAR JR|Paris Saint-Germa...|   175|  68|1992| 02| 05|     1992-02-05|
| Brazil|    11|     MF|  P. COUTINHO|  FC Barcelona (ESP)|   172|  68|1992| 06| 12|     1992-06-12|
| Brazil|     7|     FW|     D. COSTA|   Juventus FC (ITA)|   182|  70|1990| 09| 14|     19

In [68]:
df.where('Selecao = "Brazil"').where('Posicao = "FW"').show()

+-------+------+-------+-------------+--------------------+------+----+----+---+---+---------------+
|Selecao|Numero|Posicao|Nome_Camiseta|                Time|Altura|Peso| Ano|Mes|Dia|Data_Nascimento|
+-------+------+-------+-------------+--------------------+------+----+----+---+---+---------------+
| Brazil|    21|     FW|       TAISON|FC Shakhtar Donet...|   172|  64|1988| 01| 13|     1988-01-13|
| Brazil|    10|     FW|    NEYMAR JR|Paris Saint-Germa...|   175|  68|1992| 02| 05|     1992-02-05|
| Brazil|     7|     FW|     D. COSTA|   Juventus FC (ITA)|   182|  70|1990| 09| 14|     1990-09-14|
| Brazil|     9|     FW|     G. JESUS|Manchester City F...|   175|  73|1997| 04| 03|     1997-04-03|
| Brazil|    20|     FW|      FIRMINO|  Liverpool FC (ENG)|   181|  76|1991| 10| 02|     1991-10-02|
+-------+------+-------+-------------+--------------------+------+----+----+---+---+---------------+



In [72]:
top1 = Window.partitionBy('Selecao').orderBy(desc('Altura'))

df.withColumn('Top', row_number().over(top1)).filter('Top = "1"').show()

+--------------+------+-------+-------------+--------------------+------+----+----+---+---+---------------+---+
|       Selecao|Numero|Posicao|Nome_Camiseta|                Time|Altura|Peso| Ano|Mes|Dia|Data_Nascimento|Top|
+--------------+------+-------+-------------+--------------------+------+----+----+---+---+---------------+---+
|     Argentina|     6|     DF|        FAZIO|       AS Roma (ITA)|   199|  85|1987| 03| 17|     1987-03-17|  1|
|     Australia|    12|     GK|        JONES|Feyenoord Rotterd...|   193|  87|1982| 03| 19|     1982-03-19|  1|
|       Belgium|     1|     GK|     COURTOIS|    Chelsea FC (ENG)|   199|  91|1992| 05| 11|     1992-05-11|  1|
|        Brazil|    16|     GK|       CASSIO|SC Corinthians (BRA)|   195|  92|1987| 06| 06|     1987-06-06|  1|
|      Colombia|    13|     DF|      Y. MINA|  FC Barcelona (ESP)|   194|  95|1994| 09| 23|     1994-09-23|  1|
|    Costa Rica|    19|     DF|    K. WASTON|Vancouver Whiteca...|   196|  87|1988| 01| 01|     1988-01-

In [74]:
df.where('Selecao = "Brazil"').describe().show()

+-------+-------+-----------------+-------+-------------+--------------------+-----------------+-----------------+-----------------+-----------------+-----------------+
|summary|Selecao|           Numero|Posicao|Nome_Camiseta|                Time|           Altura|             Peso|              Ano|              Mes|              Dia|
+-------+-------+-----------------+-------+-------------+--------------------+-----------------+-----------------+-----------------+-----------------+-----------------+
|  count|     23|               23|     23|           23|                  23|               23|               23|               23|               23|               23|
|   mean|   NULL|             12.0|   NULL|         NULL|                NULL|180.7826086956522|76.56521739130434|1989.391304347826|6.130434782608695|11.26086956521739|
| stddev|   NULL|6.782329983125267|   NULL|         NULL|                NULL|7.354383490255254|8.239737898283606|3.499858833968506|2.784769418006175|6.876

In [81]:
parametro = Window.partitionBy('Selecao').orderBy(desc('Altura'))
parametro2 = Window.partitionBy('Selecao')

df.withColumn('linhax', row_number().over(parametro))\
  .withColumn('media', avg('Altura').over(parametro2))\
  .withColumn('max', max('Altura').over(parametro2))\
  .withColumn('min', min('Altura').over(parametro2))\
  .filter('linhax = "1"').select('Selecao', 'media', 'max', 'min')\
  .orderBy('media', ascending=False).show()

+--------------+------------------+---+---+
|       Selecao|             media|max|min|
+--------------+------------------+---+---+
|        Serbia|186.69565217391303|195|169|
|       Denmark| 186.6086956521739|200|171|
|       Germany| 185.7826086956522|195|176|
|        Sweden| 185.7391304347826|198|177|
|       Iceland|185.52173913043478|198|170|
|       Belgium|185.34782608695653|199|169|
|       Croatia| 185.2608695652174|201|172|
|       Nigeria|184.52173913043478|197|172|
|       IR Iran|184.47826086956522|194|177|
|        Russia| 184.3913043478261|196|173|
|       Senegal|183.65217391304347|196|173|
|        France|183.30434782608697|197|168|
|        Poland|183.17391304347825|195|172|
|       Tunisia|183.08695652173913|192|170|
|   Switzerland|182.91304347826087|192|165|
|       England| 182.7391304347826|196|170|
|       Morocco|182.69565217391303|190|167|
|        Panama|182.17391304347825|197|165|
|Korea Republic| 181.8695652173913|197|170|
|       Uruguay|181.043478260869