### Which country has produced the most successful tennis players?


In [9]:
import os
from dotenv import load_dotenv, find_dotenv
from tennis_data_analysis.db.mysql import Database
import pandas as pd

dotenv_path = find_dotenv()
load_dotenv(dotenv_path)

mysql = Database(os.environ.get('DB_HOST'), os.environ.get('DB_USERNAME'), os.environ.get('DB_PASSWORD'), os.environ.get('DB_DATABASE'), os.environ.get('DB_PORT'), True)


## SQL Solution

#### find best country players base on number of wins
Top 10 country based on win

In [10]:
sql = """
SELECT pl.country, SUM(wins.num_of_win) AS country_wins
FROM (
    SELECT p.id AS player_id, p.full_name, p.slug, COUNT(r.winner_player_id) AS num_of_win
    FROM GameInfoResult AS r
    LEFT JOIN Players AS p ON p.id=r.winner_player_id
    WHERE r.winner_player_id != 0
    GROUP BY r.winner_player_id
    ORDER BY num_of_win DESC
) AS wins
JOIN Players AS pl ON pl.slug=wins.slug
GROUP BY pl.country
ORDER BY country_wins DESC
LIMIT 10
"""
mysql.db.execute(sql)
result = mysql.db.fetchall()

if result:
    for row in result:
        print(f"{row['country']}: {row['country_wins']}")
else:
    print("Not Found")


USA: 468
Italy: 453
France: 393
Japan: 318
Germany: 280
Russia: 267
Spain: 207
Argentina: 196
China: 195
Australia: 189


#### Find best country players base on average rank of players where count of country players is greater or equal than 50

~~~sql
SELECT p.country, AVG(current_rank) AS avg_rank
FROM Players AS p
WHERE p.current_rank > 0
GROUP BY p.country
HAVING COUNT(p.country) >= 50
ORDER BY avg_rank ASC
~~~

In [16]:
import tabulate as tabulate

sql = """
SELECT p.country, AVG(current_rank) AS avg_rank, COUNT(p.id) AS country_player_count
FROM Players AS p
WHERE p.current_rank > 0
GROUP BY p.country
HAVING country_player_count >= 50
ORDER BY avg_rank ASC
"""
mysql.db.execute(sql)
result = mysql.db.fetchall()

if result:
    result = [[i+1, row['country'], row['avg_rank'], row['country_player_count']] for i, row in enumerate(result)]
    print(tabulate.tabulate(result, headers=['#', 'Country', 'Rank Averag', 'Count of players'], tablefmt='outline', stralign="left", numalign="center"))
else:
    print("Not Found")


+-----+----------------+---------------+--------------------+
|  #  | Country        |  Rank Averag  |  Count of players  |
|  1  | Australia      |    547.444    |         72         |
|  2  | Czech Republic |    568.833    |         60         |
|  3  | France         |    572.55     |        151         |
|  4  | Argentina      |    625.962    |         78         |
|  5  | Russia         |    639.958    |        118         |
|  6  | Spain          |    661.614    |         88         |
|  7  | USA            |    673.544    |        204         |
|  8  | Germany        |    702.645    |        107         |
|  9  | United Kingdom |    707.431    |         65         |
| 10  | China          |    711.915    |         71         |
| 11  | Japan          |    758.093    |        118         |
| 12  | Italy          |    781.258    |        178         |
| 13  | Romania        |    800.533    |         60         |
| 14  | Brazil         |    809.519    |         52         |
+-----+-