Skip to content

Commit

Permalink
feat(web-scraping): allow users to define custom cron schedules for t…
Browse files Browse the repository at this point in the history
…he tracker jobs
  • Loading branch information
azasypkin committed May 31, 2024
1 parent 8b98cf5 commit acbd82a
Show file tree
Hide file tree
Showing 13 changed files with 285 additions and 18 deletions.
23 changes: 23 additions & 0 deletions dev/api/scheduler/parse_schedule.http
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
### Parse cron-like schedule (too little interval).
# @no-cookie-jar
POST {{host}}/api/scheduler/parse_schedule
Accept: application/json
Content-Type: application/json
Cookie: {{cookie-credentials}}

{
"schedule": "* * * * * *"
}

### Parse cron-like schedule (every Sunday).
### // sec min hour day of month month day of week year
### let expression = "0 30 9,12,15 1,15 May-Aug Mon,Wed,Fri 2018/2";
# @no-cookie-jar
POST {{host}}/api/scheduler/parse_schedule
Accept: application/json
Content-Type: application/json
Cookie: {{cookie-credentials}}

{
"schedule": "0 * 9,12,15 1,15 * *"
}
4 changes: 4 additions & 0 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ mod tests {
trackers: 100,
tracker_revisions: 30,
tracker_schedules: None,
min_schedule_interval: 10s,
},
certificates: SubscriptionCertificatesConfig {
private_keys: 100,
Expand All @@ -344,6 +345,7 @@ mod tests {
trackers: 100,
tracker_revisions: 30,
tracker_schedules: None,
min_schedule_interval: 10s,
},
certificates: SubscriptionCertificatesConfig {
private_keys: 100,
Expand All @@ -366,6 +368,7 @@ mod tests {
trackers: 100,
tracker_revisions: 30,
tracker_schedules: None,
min_schedule_interval: 10s,
},
certificates: SubscriptionCertificatesConfig {
private_keys: 100,
Expand All @@ -388,6 +391,7 @@ mod tests {
trackers: 100,
tracker_revisions: 30,
tracker_schedules: None,
min_schedule_interval: 10s,
},
certificates: SubscriptionCertificatesConfig {
private_keys: 100,
Expand Down
12 changes: 12 additions & 0 deletions src/config/raw_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ mod tests {
[subscriptions.basic.web_scraping]
trackers = 100
tracker_revisions = 30
min_schedule_interval = 10000
[subscriptions.basic.certificates]
private_keys = 100
Expand All @@ -131,6 +132,7 @@ mod tests {
[subscriptions.standard.web_scraping]
trackers = 100
tracker_revisions = 30
min_schedule_interval = 10000
[subscriptions.standard.certificates]
private_keys = 100
Expand All @@ -148,6 +150,7 @@ mod tests {
[subscriptions.professional.web_scraping]
trackers = 100
tracker_revisions = 30
min_schedule_interval = 10000
[subscriptions.professional.certificates]
private_keys = 100
Expand All @@ -165,6 +168,7 @@ mod tests {
[subscriptions.ultimate.web_scraping]
trackers = 100
tracker_revisions = 30
min_schedule_interval = 10000
[subscriptions.ultimate.certificates]
private_keys = 100
Expand Down Expand Up @@ -223,6 +227,7 @@ mod tests {
[subscriptions.basic.web_scraping]
trackers = 1
tracker_revisions = 11
min_schedule_interval = 10_000
[subscriptions.basic.web_security]
policies = 10
Expand All @@ -242,6 +247,7 @@ mod tests {
[subscriptions.standard.web_scraping]
trackers = 2
tracker_revisions = 22
min_schedule_interval = 20_000
[subscriptions.standard.web_security]
policies = 1000
Expand All @@ -261,6 +267,7 @@ mod tests {
[subscriptions.professional.web_scraping]
trackers = 3
tracker_revisions = 33
min_schedule_interval = 30_000
[subscriptions.professional.web_security]
policies = 1000
Expand All @@ -280,6 +287,7 @@ mod tests {
[subscriptions.ultimate.web_scraping]
trackers = 4
tracker_revisions = 44
min_schedule_interval = 40_000
[subscriptions.ultimate.web_security]
policies = 1000
Expand Down Expand Up @@ -516,6 +524,7 @@ mod tests {
trackers: 1,
tracker_revisions: 11,
tracker_schedules: None,
min_schedule_interval: 10s,
},
certificates: SubscriptionCertificatesConfig {
private_keys: 1,
Expand All @@ -542,6 +551,7 @@ mod tests {
trackers: 2,
tracker_revisions: 22,
tracker_schedules: None,
min_schedule_interval: 20s,
},
certificates: SubscriptionCertificatesConfig {
private_keys: 2,
Expand All @@ -568,6 +578,7 @@ mod tests {
trackers: 3,
tracker_revisions: 33,
tracker_schedules: None,
min_schedule_interval: 30s,
},
certificates: SubscriptionCertificatesConfig {
private_keys: 3,
Expand All @@ -594,6 +605,7 @@ mod tests {
trackers: 4,
tracker_revisions: 44,
tracker_schedules: None,
min_schedule_interval: 40s,
},
certificates: SubscriptionCertificatesConfig {
private_keys: 4,
Expand Down
24 changes: 22 additions & 2 deletions src/config/subscriptions_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ mod tests {
[basic.web_scraping]
trackers = 100
tracker_revisions = 30
min_schedule_interval = 10000
[basic.certificates]
private_keys = 100
Expand All @@ -88,6 +89,7 @@ mod tests {
[standard.web_scraping]
trackers = 100
tracker_revisions = 30
min_schedule_interval = 10000
[standard.certificates]
private_keys = 100
Expand All @@ -105,6 +107,7 @@ mod tests {
[professional.web_scraping]
trackers = 100
tracker_revisions = 30
min_schedule_interval = 10000
[professional.certificates]
private_keys = 100
Expand All @@ -122,6 +125,7 @@ mod tests {
[ultimate.web_scraping]
trackers = 100
tracker_revisions = 30
min_schedule_interval = 10000
[ultimate.certificates]
private_keys = 100
Expand Down Expand Up @@ -152,6 +156,7 @@ mod tests {
[basic.web_scraping]
trackers = 100
tracker_revisions = 30
min_schedule_interval = 10000
[basic.certificates]
private_keys = 100
Expand All @@ -169,6 +174,7 @@ mod tests {
[standard.web_scraping]
trackers = 100
tracker_revisions = 30
min_schedule_interval = 10000
[standard.certificates]
private_keys = 100
Expand All @@ -186,6 +192,7 @@ mod tests {
[professional.web_scraping]
trackers = 100
tracker_revisions = 30
min_schedule_interval = 10000
[professional.certificates]
private_keys = 100
Expand All @@ -203,6 +210,7 @@ mod tests {
[ultimate.web_scraping]
trackers = 100
tracker_revisions = 30
min_schedule_interval = 10000
[ultimate.certificates]
private_keys = 100
Expand Down Expand Up @@ -231,6 +239,7 @@ mod tests {
trackers = 1
tracker_revisions = 11
tracker_schedules = ["@", "@daily", "@weekly", "@monthly"]
min_schedule_interval = 10_000
[basic.certificates]
private_keys = 1
Expand All @@ -251,6 +260,7 @@ mod tests {
trackers = 2
tracker_revisions = 22
tracker_schedules = ["@", "@hourly", "@daily", "@weekly", "@monthly"]
min_schedule_interval = 20_000
[standard.web_security]
policies = 1000
Expand All @@ -270,6 +280,7 @@ mod tests {
[professional.web_scraping]
trackers = 3
tracker_revisions = 33
min_schedule_interval = 30_000
[professional.web_security]
policies = 1000
Expand All @@ -288,6 +299,7 @@ mod tests {
[ultimate.web_scraping]
trackers = 4
tracker_revisions = 44
min_schedule_interval = 40_000
[ultimate.web_security]
policies = 1000
Expand Down Expand Up @@ -323,7 +335,8 @@ mod tests {
]
.into_iter()
.collect()
)
),
min_schedule_interval: Duration::from_secs(10),
},
web_security: SubscriptionWebSecurityConfig {
policies: 10,
Expand Down Expand Up @@ -362,7 +375,8 @@ mod tests {
]
.into_iter()
.collect()
)
),
min_schedule_interval: Duration::from_secs(20),
},
web_security: SubscriptionWebSecurityConfig::default(),
certificates: SubscriptionCertificatesConfig {
Expand All @@ -389,6 +403,7 @@ mod tests {
trackers: 3,
tracker_revisions: 33,
tracker_schedules: None,
min_schedule_interval: Duration::from_secs(30),
},
web_security: SubscriptionWebSecurityConfig::default(),
certificates: SubscriptionCertificatesConfig {
Expand All @@ -408,6 +423,7 @@ mod tests {
trackers: 4,
tracker_revisions: 44,
tracker_schedules: None,
min_schedule_interval: Duration::from_secs(40),
},
web_security: SubscriptionWebSecurityConfig::default(),
certificates: SubscriptionCertificatesConfig {
Expand Down Expand Up @@ -445,6 +461,7 @@ mod tests {
.into_iter()
.collect(),
),
min_schedule_interval: Duration::from_secs(10),
},
web_security: SubscriptionWebSecurityConfig {
policies: 10,
Expand Down Expand Up @@ -484,6 +501,7 @@ mod tests {
.into_iter()
.collect(),
),
min_schedule_interval: Duration::from_secs(20),
},
web_security: SubscriptionWebSecurityConfig::default(),
certificates: SubscriptionCertificatesConfig {
Expand All @@ -510,6 +528,7 @@ mod tests {
trackers: 3,
tracker_revisions: 33,
tracker_schedules: None,
min_schedule_interval: Duration::from_secs(30),
},
web_security: SubscriptionWebSecurityConfig::default(),
certificates: SubscriptionCertificatesConfig {
Expand All @@ -529,6 +548,7 @@ mod tests {
trackers: 4,
tracker_revisions: 44,
tracker_schedules: None,
min_schedule_interval: Duration::from_secs(40),
},
web_security: SubscriptionWebSecurityConfig::default(),
certificates: SubscriptionCertificatesConfig {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use serde_derive::{Deserialize, Serialize};
use std::collections::HashSet;
use serde_with::{serde_as, DurationMilliSeconds};
use std::{collections::HashSet, time::Duration};

#[serde_as]
#[derive(Deserialize, Serialize, Debug, Clone, PartialEq, Eq)]
pub struct SubscriptionWebScrapingConfig {
/// The number of trackers (content, resources etc.) available to a particular subscription.
Expand All @@ -9,6 +11,9 @@ pub struct SubscriptionWebScrapingConfig {
pub tracker_revisions: usize,
/// The list of allowed schedules for the trackers for a particular subscription.
pub tracker_schedules: Option<HashSet<String>>,
/// The minimum interval between two consequent scheduled tracker checks.
#[serde_as(as = "DurationMilliSeconds<u64>")]
pub min_schedule_interval: Duration,
}

impl Default for SubscriptionWebScrapingConfig {
Expand All @@ -18,6 +23,8 @@ impl Default for SubscriptionWebScrapingConfig {
tracker_revisions: 30,
// Default to None to allow all schedules.
tracker_schedules: None,
// Default to 10 seconds.
min_schedule_interval: Duration::from_secs(10),
}
}
}
Expand All @@ -33,6 +40,7 @@ mod tests {
assert_toml_snapshot!(config, @r###"
trackers = 100
tracker_revisions = 30
min_schedule_interval = 10000
"###);
}

Expand All @@ -42,6 +50,7 @@ mod tests {
r#"
trackers = 100
tracker_revisions = 30
min_schedule_interval = 10_000
"#,
)
.unwrap();
Expand Down
2 changes: 1 addition & 1 deletion src/scheduler/schedule_ext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ pub trait ScheduleExt {
}

impl ScheduleExt for Schedule {
/// Returns the minimum interval between occurrences. To calculate it, we take the first 10
/// Returns the minimum interval between occurrences. To calculate it, we take the first 100
/// upcoming occurrences and calculate the interval between each of them. Then we take the
/// smallest interval.
fn min_interval(&self) -> anyhow::Result<Duration> {
Expand Down
4 changes: 4 additions & 0 deletions src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ pub async fn run(raw_config: RawConfig) -> Result<(), anyhow::Error> {
.route("/self", web::get().to(handlers::security_users_get_self))
.route("/{user_id}", web::get().to(handlers::security_users_get)),
)
.service(web::scope("/scheduler").route(
"/parse_schedule",
web::post().to(handlers::scheduler_parse_schedule),
))
.service(
web::scope("/utils")
.service(
Expand Down
Loading

0 comments on commit acbd82a

Please sign in to comment.