In [26]:
!pip3 install elasticsearch nba_api

Looking in indexes: http://mirrors.aliyun.com/pypi/simple/


In [50]:
from dotenv import load_dotenv
from nba_api.stats.static import teams
from nba_api.stats.endpoints import leaguegamefinder
from elasticsearch import Elasticsearch, helpers
import os


load_dotenv()
 
elastic_user=os.getenv('ES_USER')
elastic_password=os.getenv('ES_PASSWORD')

url = f"https://{elastic_user}:{elastic_password}@localhost:9200"
es = Elasticsearch(url, ca_certs = "./http_ca.crt", verify_certs = True)
 
print(es.info())

{'name': 'liuxgm.local', 'cluster_name': 'elasticsearch', 'cluster_uuid': 'h2QwONxsT4Kt-lTRKmPrhg', 'version': {'number': '8.12.0', 'build_flavor': 'default', 'build_type': 'tar', 'build_hash': '1665f706fd9354802c02146c1e6b5c0fbcddfbc9', 'build_date': '2024-01-11T10:05:27.953830042Z', 'build_snapshot': False, 'lucene_version': '9.9.1', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}


In [51]:
nba_teams = teams.get_teams()
celtics = [team for team in nba_teams if team['abbreviation'] == 'BOS'][0]
celtics_id = celtics['id']

In [52]:
celtics

{'id': 1610612738,
 'full_name': 'Boston Celtics',
 'abbreviation': 'BOS',
 'nickname': 'Celtics',
 'city': 'Boston',
 'state': 'Massachusetts',
 'year_founded': 1946}

In [53]:
gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=celtics_id)
games = gamefinder.get_data_frames()[0]
games.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22023,1610612738,BOS,Boston Celtics,22300706,2024-02-04,BOS vs. MEM,W,242,131,...,0.889,17,40,57,30,12,4,10,13,40.0
1,22023,1610612738,BOS,Boston Celtics,22300684,2024-02-01,BOS vs. LAL,L,240,105,...,1.0,14,40,54,30,4,12,15,19,-9.0
2,22023,1610612738,BOS,Boston Celtics,22300670,2024-01-30,BOS vs. IND,W,240,129,...,0.818,7,33,40,25,6,9,10,16,5.0
3,22023,1610612738,BOS,Boston Celtics,22300659,2024-01-29,BOS vs. NOP,W,240,118,...,0.75,6,34,40,27,7,5,11,15,6.0
4,22023,1610612738,BOS,Boston Celtics,22300646,2024-01-27,BOS vs. LAC,L,240,96,...,0.875,18,34,52,21,2,9,11,13,-19.0


In [54]:
current_season = games.loc[(games['GAME_DATE'] >= '2023-10-24') & (games['GAME_DATE'] <= '2024-06-20')]
current_season

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22023,1610612738,BOS,Boston Celtics,22300706,2024-02-04,BOS vs. MEM,W,242,131,...,0.889,17,40,57,30,12,4,10,13,40.0
1,22023,1610612738,BOS,Boston Celtics,22300684,2024-02-01,BOS vs. LAL,L,240,105,...,1.0,14,40,54,30,4,12,15,19,-9.0
2,22023,1610612738,BOS,Boston Celtics,22300670,2024-01-30,BOS vs. IND,W,240,129,...,0.818,7,33,40,25,6,9,10,16,5.0
3,22023,1610612738,BOS,Boston Celtics,22300659,2024-01-29,BOS vs. NOP,W,240,118,...,0.75,6,34,40,27,7,5,11,15,6.0
4,22023,1610612738,BOS,Boston Celtics,22300646,2024-01-27,BOS vs. LAC,L,240,96,...,0.875,18,34,52,21,2,9,11,13,-19.0
5,22023,1610612738,BOS,Boston Celtics,22300630,2024-01-25,BOS @ MIA,W,239,143,...,0.95,6,41,47,36,4,5,11,10,33.0
6,22023,1610612738,BOS,Boston Celtics,22300611,2024-01-22,BOS @ DAL,W,240,119,...,0.733,5,39,44,26,7,8,6,20,9.0
7,22023,1610612738,BOS,Boston Celtics,22300603,2024-01-21,BOS @ HOU,W,240,116,...,0.722,16,39,55,31,10,12,16,17,9.0
8,22023,1610612738,BOS,Boston Celtics,22300586,2024-01-19,BOS vs. DEN,L,240,100,...,0.714,12,26,38,21,5,2,2,17,-2.0
9,22023,1610612738,BOS,Boston Celtics,22300571,2024-01-17,BOS vs. SAS,W,240,117,...,0.789,12,42,54,22,5,6,12,15,19.0


In [55]:
current_season.isnull().values.any()

False

# Create index

In [56]:
INDEX_NAME = "boston_celtics_current_season"

es.indices.create(index = INDEX_NAME)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'boston_celtics_current_season'})

In [57]:
def doc_generator(df, timeframe):
    for index, document in df.iterrows():
        yield {
            "_index": INDEX_NAME, 
            "_id": f"{document['GAME_ID']}",
            "_source": document.to_dict(),
        }

In [58]:
helpers.bulk(es, doc_generator(current_season, index))
es.indices.refresh(index=index)

ObjectApiResponse({'_shards': {'total': 2, 'successful': 1, 'failed': 0}})

# Search 

In [59]:
search_query = {
    "query": {
        "match": {
            "WL": "W"
        }
    }
}

games_won = es.count(index="boston_celtics_current_season", body=search_query)

In [60]:
print(f"The Celtics won {games_won['count']} games this season so far.")

The Celtics won 38 games this season so far.


In [62]:
streak_query = {
  "size": 1000,  
  "sort": [
    {
      "GAME_DATE": {
        "order": "asc"
      }
    }
  ],
  "_source": ["GAME_DATE", "WL"]
}

In [63]:
streak_search = es.search(
    index=INDEX_NAME,
    body=streak_query)

In [64]:
gs = [hit['_source'] for hit in streak_search['hits']['hits']]

In [65]:
streaks = []
current_streak = 1
for i in range(1, len(gs)):
    if gs[i]['WL'] == gs[i-1]['WL']:
        current_streak += 1
    else:
        streaks.append((gs[i-1]['WL'], current_streak))
        current_streak = 1


streaks.append((gs[-1]['WL'], current_streak))
top_streaks = sorted(streaks, key=lambda x: x[1], reverse=True)[:5]
print(top_streaks)

[('W', 6), ('W', 6), ('W', 5), ('W', 5), ('W', 3)]
