# Initialize MongoDB client
See README.md for setup instructions.

In [1]:
import os
from urllib.parse import quote_plus
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi

username = quote_plus('common')
password = quote_plus(os.environ.get('MONGODB_PASSWORD'))
uri = f"mongodb+srv://{username}:{password}@playervaluations.v7jevdf.mongodb.net/?retryWrites=true&w=majority"
# Create a new client and connect to the server
client = MongoClient(uri, server_api=ServerApi('1'))
# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

Pinged your deployment. You successfully connected to MongoDB!


In [2]:
import json

db = client['player_valuations']
collection = db['players']
player = collection.find_one({'player_id': 10})

# Print the result
if player:
    print("Player found:", json.dumps(player, indent=4, default=str))
else:
    print("No player found with player_id", 65)

Player found: {
    "_id": "65a01328c758554bfe6e0d2f",
    "player_id": 10,
    "first_name": "Miroslav",
    "last_name": "Klose",
    "name": "Miroslav Klose",
    "last_season": 2015,
    "current_club_id": 398,
    "player_code": "miroslav-klose",
    "country_of_birth": "Poland",
    "city_of_birth": "Opole",
    "country_of_citizenship": "Germany",
    "date_of_birth": "1978-06-09",
    "sub_position": "Centre-Forward",
    "position": "Attack",
    "foot": "right",
    "height_in_cm": 184.0,
    "contract_expiration_date": NaN,
    "agent_name": "ASBW Sport Marketing",
    "image_url": "https://img.a.transfermarkt.technology/portrait/header/10-1448468291.jpg?lm=1",
    "url": "https://www.transfermarkt.co.uk/miroslav-klose/profil/spieler/10",
    "current_club_domestic_competition_id": "IT1",
    "current_club_name": "Societ\u00e0 Sportiva Lazio S.p.A.",
    "market_value_in_eur": 1000000.0,
    "highest_market_value_in_eur": 30000000.0,
    "valuations": [
        {
           

In [3]:
import pandas as pd

db = client['player_valuations']
collection = db['players']
res = collection.find()

df =  pd.DataFrame(list(res))
df.drop("_id", axis=1, inplace=True)

In [8]:
from pyspark.sql import SparkSession

ss = SparkSession.builder.master("local[*]").getOrCreate()

player_valuation_df=ss.createDataFrame(df)
appearances_df = ss.read.csv("raw_data\\transfermarkt\\appearances.csv", header=True, inferSchema=True)
players_df = ss.read.option("multiline","true").json("raw_data\\transfermarkt\\players.json")
players_df.show(1)

ss.stop()

  if should_localize and is_datetime64tz_dtype(s.dtype) and s.dt.tz is not None:


+--------------------+-------------+------------------------+----------------+----------------------+------------------------------------+---------------+--------------------+-------------+----------+-----+------------+---------------------------+--------------------+---------+-----------+-------------------+--------------+--------------+---------+--------+--------------+--------------------+
|          agent_name|city_of_birth|contract_expiration_date|country_of_birth|country_of_citizenship|current_club_domestic_competition_id|current_club_id|   current_club_name|date_of_birth|first_name| foot|height_in_cm|highest_market_value_in_eur|           image_url|last_name|last_season|market_value_in_eur|          name|   player_code|player_id|position|  sub_position|                 url|
+--------------------+-------------+------------------------+----------------+----------------------+------------------------------------+---------------+--------------------+-------------+----------+-----+--

# Close MongoDB client

In [5]:
client.close()