-
Notifications
You must be signed in to change notification settings - Fork 4
/
duolingo.py
448 lines (365 loc) · 15.4 KB
/
duolingo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
"""
Custom API Client for Duolingo. This is necessary to access your own statistics in your Duolingo account.
Essentially, there are three endpoints that will be used during the lifecycle of this API helper, which are:
- `https://www.duolingo.com/login` -- to log in to the API.
- `https://www.duolingo.com/users/<USERNAME>` -- to access the currently logged in user's data and streak information.
- `https://www.duolingo.com/2017-06-30/users/<UID>/xp_summaries?startDate=1970-01-01` -- to access the currently logged in user's experience gain information.
Please use this code responsibly and do not spam Duolingo's servers by using it like you're a bot or something.
You'll get rate-limited, make their software engineers jobs' harder, and it's not a good thing.
"""
from dataclasses import dataclass
from datetime import datetime
from json import loads, dumps
from typing import Any, Literal, NoReturn, Optional, Union
from pydantic import BaseModel, ConfigDict, Field, ValidationError
import requests
class Summary(BaseModel):
"""
API response of Duolingo's single summary entry.
"""
model_config = ConfigDict(populate_by_name=True)
date: int = Field(alias="date")
daily_goal_xp: int = Field(alias="dailyGoalXp")
gained_xp: int = Field(alias="gainedXp")
num_sessions: int = Field(alias="numSessions")
total_session_time: int = Field(alias="totalSessionTime")
class UserDataResponse(BaseModel):
"""
API response of Duolingo streak count.
"""
site_streak: int
@dataclass
class Duolingo:
"""
REST API Client for Duolingo API. Please use responsibly and do not spam their servers. When initializing
this class, please use `kwargs`-style arguments (key-value) rather than just inputting it per parameter. This
is to ensure an explicit initialization instead of implicit initialization.
"""
##
# Special exceptions relevant to this class to be exported and used by an external party.
# This is important, as we want to define our own exceptions instead of using the
# already made ones.
##
class BreakingAPIChange(Exception):
"""
Special exceptions if the format of the API suddenly change.
"""
class CaptchaException(Exception):
"""
Special exception for captcha responses. If this happens, it means that you
are probably caught in their spam filter and have to change your user agent. You also
have to log in again.
"""
class LoginException(Exception):
"""
Special exception if you failed to log in to the API. This means that your credentials are either wrong,
or an internal server error happened at Duolingo's API.
"""
class NotFoundException(Exception):
"""
Exception that will be thrown if the API returns a `404 Not Found`.
"""
class UnauthorizedException(Exception):
"""
Exception that will be thrown if the API returns a `401 Unauthorized`.
"""
##
# Constants, unchanging state of this class.
##
BASE_URL = "https://www.duolingo.com"
"""Base URL of Duolingo's API."""
##
# Class members to be initialized in the `__init__` method. Remember, this is a `@dataclass`. For usage, it is
# recommended that you treat this like inserting `**kwargs`-style arguments.
##
username: str
"""Your Duolingo's username."""
password: Optional[str]
"""Your Duolingo's password. Can be superseded by your Duolingo's JSON Web Token if it exists."""
jwt: Optional[str]
"""Your Duolingo's JSON Web Token. The main token used to authenticate your requests to the API."""
session = requests.Session()
"""Session of this class instance. Using sessions will be helpful to preserve network footprints."""
daily_experience_progress: dict[str, Any]
"""Your Duolingo's daily experience progress."""
user_data: dict[str, Any]
"""Your Duolingo's user data."""
login_method: Union[Literal["JWT"], Literal["Password"]] = "Password"
"""Method of login used to authenticate yourself at the Duolingo API, by default was set to `Password`, capital letter at the front."""
user_agent: str = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
"""A user agent to be used to make requests to the API."""
##
# Methods of this class.
##
def request(
self,
url: str,
data: Optional[dict[str, Any]] = None,
) -> requests.Response:
"""
Used to perform a request / API call to Duolingo's API. Handles all possible errors I could
think of, with the proper authorization (network headers) and request body.
"""
# Creates required network headers to perform authenticated requests.
headers = {
"Authorization": f"Bearer {self.jwt}" if self.jwt is not None else "",
"User-Agent": self.user_agent,
}
# Prepare request.
request = requests.Request(
method="POST" if data else "GET",
url=url,
json=data,
headers=headers,
cookies=self.session.cookies,
)
# Send request.
response = self.session.send(request.prepare())
# Handle several errors: `401` and `404`.
if response.status_code == 401:
raise self.UnauthorizedException(
f"You are not authorized to access the resource with URL: '{url}'. Please try again with the correct credentials."
)
elif response.status_code == 404:
raise self.NotFoundException(
"The resource that you are looking for is not found."
)
# Handle an edge case: captcha lock-out!
if (
response.status_code == 403
and response.json().get("blockScript") is not None
):
raise self.CaptchaException(
f"Request to '{url}' with user agent '{self.user_agent}' was blocked, and the API requests you to solve a captcha. Please try logging in again with a different user agent."
)
# Return proper response object.
return response
def login(self) -> Union[str, NoReturn]:
"""
Logs in to the Duolingo API. Steps:
- If the user does not have a JWT, they will be logged in with their `username` and `password`.
- Populates the whole `user_data` and `daily_progress` dictionary.
Please store the JWT (returned from this function) after this function returns. This is
intentionally done to prevent side-effects, keeping this function as pure as possible.
"""
# Log in properly if the user does not have any JWT.
if not self.jwt:
response = self.request(
f"{self.BASE_URL}/login",
data={
"login": self.username,
"password": self.password,
},
)
if "failure" in response.json():
raise self.LoginException(
"Failed to log in with your current credentials. Please check it and try again later."
)
# Inject our JWT for subsequent requests in the same session.
self.jwt = response.headers["jwt"]
else:
# If we log in with JWT, we have to make sure that we set this flag.
self.login_method = "JWT"
# Return our JWT.
return self.jwt
def fetch_data(self) -> tuple[dict[str, Any], dict[str, Any]]:
"""
Fetches the user's data from the Duolingo's API. This should be called right after one has logged in. Method
will perform two API calls.
"""
self.user_data = self.request(f"{self.BASE_URL}/users/{self.username}").json()
self.daily_experience_progress = self.request(
f"{self.BASE_URL}/2017-06-30/users/{self.user_data['id']}/xp_summaries?startDate=1970-01-01"
).json()
return self.user_data, self.daily_experience_progress
def get_summaries(self) -> list[Summary]:
"""
Gets the summary of the currently logged in user. We will get the data of the daily goal XP,
the gained XP for today, number of sessions/lessons that the user has taken for today, and how
long the user has been using Duolingo for today.
If the API schema change, then it will throw a validation error. Expected JSON data:
```json
{
"summaries": [
{
"date": 1659657600,
"numSessions": 1,
"gainedXp": 100,
"frozen": false,
"repaired": false,
"streakExtended": true,
"userId": 1,
"dailyGoalXp": 50,
"totalSessionTime": 1
},
{
"date": 1659571200,
"numSessions": 1,
"gainedXp": 200,
"frozen": false,
"repaired": false,
"streakExtended": true,
"userId": 1,
"dailyGoalXp": 50,
"totalSessionTime": 1
}
]
}
```
As a note, `summaries` at position `0` will always show the latest time.
"""
try:
return [
Summary(**data) for data in self.daily_experience_progress["summaries"]
]
except KeyError:
raise self.BreakingAPIChange(
"API response does not conform to the schema. Perhaps the response from the server may have been changed."
)
except ValidationError:
raise self.BreakingAPIChange(
"API response does not conform to the schema. Perhaps the response from the server may have been changed."
)
def get_user_data(self) -> UserDataResponse:
"""
Gets current information about our daily streak from Duolingo. This process is done by querying the `user_data`
class attribute.
Expected JSON data (not real data):
```json
{
"site_streak": 10
}
```
"""
try:
response = UserDataResponse(**self.user_data)
return response
except ValidationError:
raise self.BreakingAPIChange(
"API response does not conform to the schema. Perhaps the response from the server may have been changed."
)
class Experience(BaseModel):
"""
Experience points for today and our goal.
"""
xp_goal: int
xp_today: int
class SessionInformation(BaseModel):
"""
Today's session information.
"""
number_of_sessions: int
session_time: int
class StreakInformation(BaseModel):
"""
Today's streak information.
"""
site_streak: int
class Progression(BaseModel):
"""
An dictionary consisting of today's expererience and session information.
"""
experience: Experience
session_information: SessionInformation
class DatabaseEntry(BaseModel):
"""
Database entry (a single object) that is a part of a list of database entries that is uploaded to the repository. This
is the authentic, Duolingo data.
"""
date: str
progression: Progression
streak_information: StreakInformation
time: str
class TimeAndStreakMapping(BaseModel):
time: str
streak: int
def summary_to_progression(summary: Summary) -> Progression:
return Progression(
experience=Experience(
xp_goal=summary.daily_goal_xp, xp_today=summary.gained_xp
),
session_information=SessionInformation(
number_of_sessions=summary.num_sessions,
session_time=summary.total_session_time,
),
)
def user_data_to_streak_information(user_data: UserDataResponse) -> StreakInformation:
return StreakInformation(site_streak=user_data.site_streak)
def sync_database_with_summary(
summary: Summary, meta: dict[str, TimeAndStreakMapping]
) -> DatabaseEntry:
summary_date = datetime.fromtimestamp(summary.date).strftime("%Y/%m/%d")
# If we skipped a day, it means we have broken the streak and Duolingo will not
# have the data in the summary, so it's safe to just return the normal data and this
# will not cause a runtime error.
time_and_streak = meta[summary_date]
return DatabaseEntry(
date=summary_date,
progression=summary_to_progression(summary),
streak_information=StreakInformation(site_streak=time_and_streak.streak),
time=time_and_streak.time,
)
def sync_database_with_summaries(
summaries: list[Summary], database: list[DatabaseEntry]
) -> tuple[list[DatabaseEntry], bool]:
# Extract all database date and time. Make this into a key value pair so we
# can easily sync it with the existing database. Ideally we would like to keep the
# date and time, but modify the progression.
time_and_streak_record = {
data.date: TimeAndStreakMapping(
time=data.time, streak=data.streak_information.site_streak
)
for data in database
}
# Technically it's sorted in reverse-chronological order, but we want to do it in
# chronological order so we can keep the streak in sync.
new_database = [
sync_database_with_summary(summary, time_and_streak_record)
for summary in summaries[::-1]
]
# Filter out `None` values that can possibly exist because of unsynced date and time in summaries.
filtered_database = [item for item in new_database if item is not None]
# Fast way to compare two list of dictionaries. We check for `progression` and `streak_information`.
# Ref: https://stackoverflow.com/a/73460831/13980107
current_progression_as_set = set(
dumps(data.progression.model_dump(), sort_keys=True) for data in database
)
filtered_progression_as_set = set(
dumps(data.progression.model_dump(), sort_keys=True)
for data in filtered_database
)
current_streak_information_as_set = set(
dumps(data.streak_information.model_dump(), sort_keys=True) for data in database
)
filtered_streak_information_as_set = set(
dumps(data.streak_information.model_dump(), sort_keys=True)
for data in filtered_database
)
# Finds out if item in `current_database` isn't in `filtered_database`. See whether there's
# any changes between the old and the new database.
out_of_sync_progression = [
loads(x)
for x in current_progression_as_set.difference(filtered_progression_as_set)
]
out_of_sync_streak_information = [
loads(x)
for x in current_streak_information_as_set.difference(
filtered_streak_information_as_set
)
]
changed = (
len(out_of_sync_progression) > 0 or len(out_of_sync_streak_information) > 0
)
# Return the processed data, and return a flag to know whether there's any out of sync data.
return filtered_database, changed
def progression_to_database_entry(
progression: Progression, streak_information: StreakInformation
) -> DatabaseEntry:
processed_date = datetime.now().strftime("%Y/%m/%d")
processed_time = datetime.now().strftime("%H:%M:%S")
return DatabaseEntry(
date=processed_date,
progression=progression,
streak_information=streak_information,
time=processed_time,
)