Skip to content

Commit

Permalink
YouTube improvements. Add a hidden way to use the RSS Box endpoint (h…
Browse files Browse the repository at this point in the history
…old the shift key when submitting the form). Add YouTube feed options to the modal. Fix the shorts filter by querying youtube.com/shorts/ for each video (it will make the query more expensive, consider using `min_length` instead).
  • Loading branch information
stefansundin committed May 6, 2024
1 parent a079717 commit 55b5fa8
Show file tree
Hide file tree
Showing 6 changed files with 151 additions and 29 deletions.
63 changes: 44 additions & 19 deletions app.rb
Expand Up @@ -282,15 +282,14 @@
# https://www.youtube.com/channel/UC4a-Gbdw7vOaccHmFo40b9g/videos
# https://www.youtube.com/channel/SWu5RTwuNMv6U
# https://www.youtube.com/channel/UCd6MoB9NC6uYN2grvUNT-Zg/search?query=aurora
elsif /youtube\.com\/(?<type>user|c|show)\/(?<slug>[^\/?#]+)(?:\/search\?query=(?<query>[^&#]+))?/ =~ params[:q]
elsif /youtube\.com\/user\/(?<user>[^\/?#]+)(?:\/search\?query=(?<query>[^&#]+))?/ =~ params[:q]
# https://www.youtube.com/user/khanacademy/videos
# https://www.youtube.com/c/khanacademy
# https://www.youtube.com/show/redvsblue
# https://www.youtube.com/user/AmazonWebServices/search?query=aurora
elsif /youtube\.com\/(?<path>c\/[^\/?#]+)(?:\/search\?query=(?<query>[^&#]+))?/ =~ params[:q]
# https://www.youtube.com/c/khanacademy
# https://www.youtube.com/c/khanacademy/search?query=Frequency+stability
# there is no way to resolve these accurately through the API, the best way is to look for the channelId meta tag in the website HTML
# note that slug != username, e.g. https://www.youtube.com/c/kawaiiguy and https://www.youtube.com/user/kawaiiguy are two different channels
user = "#{type}/#{slug}"
elsif /(?:youtu\.be|youtube\.com\/(?:embed|v|shorts))\/(?<video_id>[^?#]+)/ =~ params[:q]
# https://youtu.be/vVXbgbMp0oY?t=1s
# https://www.youtube.com/embed/vVXbgbMp0oY
Expand All @@ -307,14 +306,17 @@
# https://www.youtube.com/watch?v=vVXbgbMp0oY&t=5s
elsif /youtube\.com\/.*[?&]list=(?<playlist_id>[^&#]+)/ =~ params[:q]
# https://www.youtube.com/playlist?list=PL0QrZvg7QIgpoLdNFnEePRrU-YJfr9Be7
elsif /youtube\.com\/(?<user>[^\/?#]+)/ =~ params[:q]
elsif /youtube\.com\/(?<handle>[^\/?#]+)/ =~ params[:q]
# https://www.youtube.com/khanacademy
# https://www.youtube.com/@awscommunity
elsif /\b(?<channel_id>(?:UC[^\/?#]{22,}|S[^\/?#]{12,}))/ =~ params[:q]
# it's a channel id
elsif params[:q].start_with?("@")
# it's a handle
handle = params[:q]
else
# it's probably a channel name
user = params[:q]
# maybe it is a handle?
handle = "@#{params[:q]}"
end

if playlist_id
Expand All @@ -324,9 +326,28 @@

if user
channel_id, _ = App::Cache.cache("youtube.user", user.downcase, 60*60, 60) do
response = App::HTTP.get("https://www.youtube.com/#{user}")
response = App::YouTube.get("/channels", query: { forUsername: user })
raise(App::GoogleError, response) if !response.success?
if response.json["items"] && response.json["items"].length > 0
response.json["items"][0]["id"]
else
"Error: Could not find the user. Please try with a video url instead."
end
end
elsif handle
channel_id, _ = App::Cache.cache("youtube.handle", handle.downcase, 60*60, 60) do
response = App::YouTube.get("/channels", query: { forHandle: handle })
raise(App::GoogleError, response) if !response.success?
if response.json["items"] && response.json["items"].length > 0
response.json["items"][0]["id"]
else
"Error: Could not find the user. Please try with a video url instead."
end
end
elsif path
channel_id, _ = App::Cache.cache("youtube.path", path.downcase, 60*60, 60) do
response = App::HTTP.get("https://www.youtube.com/#{path}")
if response.redirect?
# https://www.youtube.com/tyt -> https://www.youtube.com/user/theyoungturks (different from https://www.youtube.com/user/tyt)
response = App::HTTP.get(response.redirect_url)
end
next "Error: Could not find the user. Please try with a video url instead." if response.code == 404
Expand All @@ -336,7 +357,7 @@
end
elsif video_id
channel_id, _ = App::Cache.cache("youtube.video", video_id, 60*60, 60) do
response = App::Google.get("/youtube/v3/videos", query: { part: "snippet", id: video_id })
response = App::YouTube.get("/videos", query: { part: "snippet", id: video_id })
raise(App::GoogleError, response) if !response.success?
if response.json["items"].length > 0
response.json["items"][0]["snippet"]["channelId"]
Expand All @@ -354,7 +375,7 @@
return [422, "Something went wrong. Try again later."] if channel_id.nil?
return [422, channel_id] if channel_id.start_with?("Error:")

if query || params[:type]
if query || params.has_key?(:shift)
username, _ = App::Cache.cache("youtube.channel", channel_id, 60*60, 60) do
# it is no longer possible to get usernames using the API
# note that the values include " - YouTube" at the end if the User-Agent is a browser
Expand All @@ -370,7 +391,11 @@
query = CGI.unescape(query) # youtube uses + here instead of %20
redirect Addressable::URI.new(path: "/youtube/#{channel_id}/#{username}", query_values: { q: query }.merge(params.slice(:tz))).normalize.to_s, 301
elsif channel_id
redirect "https://www.youtube.com/feeds/videos.xml" + Addressable::URI.new(query: "channel_id=#{channel_id}").normalize.to_s, 301
if params.has_key?(:shift)
redirect Addressable::URI.new(path: "/youtube/#{channel_id}/#{username}", query_values: params.slice(:tz)).normalize.to_s, 301
else
redirect "https://www.youtube.com/feeds/videos.xml" + Addressable::URI.new(query: "channel_id=#{channel_id}").normalize.to_s, 301
end
else
return [404, "Could not find the channel."]
end
Expand All @@ -386,12 +411,12 @@
data, _ = App::Cache.cache("youtube.ics", channel_id, 60*60, 60) do
# The API is really inconsistent in listing scheduled live streams, but the RSS endpoint seems to consistently list them, so experiment with using that
response = App::HTTP.get("https://www.youtube.com/feeds/videos.xml?channel_id=#{channel_id}")
next "Error: It seems like this channel no longer exists." if response.code == 404
next "Error: This channel no longer exists or has no videos." if response.code == 404
raise(App::GoogleError, response) if !response.success?
doc = Nokogiri::XML(response.body)
ids = doc.xpath("//yt:videoId").map(&:text)

response = App::Google.get("/youtube/v3/videos", query: { part: "snippet,liveStreamingDetails,contentDetails", id: ids.join(",") })
response = App::YouTube.get("/videos", query: { part: "snippet,liveStreamingDetails,contentDetails", id: ids.join(",") })
raise(App::GoogleError, response) if !response.success?

items = response.json["items"].sort_by! do |video|
Expand Down Expand Up @@ -455,12 +480,12 @@

data, @updated_at = App::Cache.cache("youtube.videos", channel_id, 60*60, 60) do
# The results from this query are not sorted by publishedAt for whatever reason.. probably due to some uploads being scheduled to be published at a certain time
response = App::Google.get("/youtube/v3/playlistItems", query: { part: "snippet", playlistId: playlist_id, maxResults: 10 })
next "Error: It seems like this channel no longer exists." if response.code == 404
response = App::YouTube.get("/playlistItems", query: { part: "snippet", playlistId: playlist_id, maxResults: 10 })
next "Error: This channel no longer exists or has no videos." if response.code == 404
raise(App::GoogleError, response) if !response.success?
ids = response.json["items"].sort_by { |v| Time.parse(v["snippet"]["publishedAt"]) }.reverse.map { |v| v["snippet"]["resourceId"]["videoId"] }

response = App::Google.get("/youtube/v3/videos", query: { part: "snippet,liveStreamingDetails,contentDetails", id: ids.join(",") })
response = App::YouTube.get("/videos", query: { part: "snippet,liveStreamingDetails,contentDetails", id: ids.join(",") })
raise(App::GoogleError, response) if !response.success?

response.json["items"].map do |video|
Expand Down Expand Up @@ -507,7 +532,7 @@

if params.has_key?(:shorts)
remove_shorts = (params[:shorts] == "0")
@data.select! { |v| v["title"].downcase.include?("#shorts") != remove_shorts }
@data.select! { |v| App::YouTube.is_short?(v["id"]) != remove_shorts }
end

if params.has_key?(:min_length) && min_length = params[:min_length].parse_duration
Expand Down Expand Up @@ -1495,7 +1520,7 @@
data, @updated_at = App::Cache.cache("imgur.user", @username.downcase, 60*60, 60) do
# can't use user_id in this request unfortunately
response = App::Imgur.get("/account/#{@username}/submissions")
next "Error: It seems like this user no longer exists." if response.code == 404
next "Error: This user no longer exists." if response.code == 404
raise(App::ImgurError, response) if !response.success? || response.body.empty?
response.json["data"].map do |image|
image.slice("animated", "cover", "datetime", "description", "gifv", "height", "id", "images_count", "is_album", "nsfw", "score", "size", "title", "type", "width")
Expand Down
1 change: 0 additions & 1 deletion app/services/google.rb
@@ -1,5 +1,4 @@
# frozen_string_literal: true
# https://developers.google.com/youtube/v3/docs/

module App
class GoogleError < HTTPError; end
Expand Down
29 changes: 29 additions & 0 deletions app/services/youtube.rb
@@ -0,0 +1,29 @@
# frozen_string_literal: true
# https://developers.google.com/youtube/v3/docs/

module App
class YouTube < Google
BASE_URL = "https://www.googleapis.com/youtube/v3"

def self.is_short?(video_id)
is_short, _ = App::Cache.cache("youtube.shorts", video_id, 7*24*60*60, 60) do
url = "https://www.youtube.com/shorts/#{video_id}"
uri = Addressable::URI.parse(url)
opts = {
use_ssl: uri.scheme == "https",
open_timeout: 10,
read_timeout: 10,
}
Net::HTTP.start(uri.host, uri.port, opts) do |http|
response = http.request_get(uri.request_uri)
$metrics[:requests_total].increment(labels: { service: "youtube", response_code: response.code })
next (response.code == "200").to_i.to_s
end
rescue => e
raise(self::ERROR_CLASS, e)
end

return is_short == "1"
end
end
end
8 changes: 8 additions & 0 deletions config/initializers/05-bool.rb
Expand Up @@ -8,6 +8,10 @@ def <=>(other)
return 0
end
end

def to_i
return 0
end
end

class TrueClass
Expand All @@ -18,4 +22,8 @@ def <=>(other)
return 0
end
end

def to_i
return 1
end
end
53 changes: 44 additions & 9 deletions public/js/main.js
Expand Up @@ -130,12 +130,23 @@ $(document).ready(async function() {
});
});

let shiftKey = false;
document.addEventListener("keydown", function (e) {
shiftKey = e.shiftKey;
});
document.addEventListener("keyup", function (e) {
shiftKey = e.shiftKey;
});

$("#services form").submit(async function(event) {
event.preventDefault();

const form = $(this);
const action = form.attr("action");
const qs = form.serialize();
let qs = form.serialize();
if (shiftKey) {
qs += "&shift";
}
const submit = form.find('input[type="submit"]');
const submit_value = submit.attr("value");
submit.attr("value", "Working...");
Expand Down Expand Up @@ -172,7 +183,7 @@ $(document).ready(async function() {
if (pathname.endsWith("/")) {
pathname = pathname.substring(0, pathname.length-1);
}
url = `${window.location.protocol}//${window.location.host}${pathname}${data}`;
url = `${window.location.origin}${pathname}${data}`;
// initiate a request just to get a head start on resolving urls
fetch(url);
} else {
Expand All @@ -181,6 +192,11 @@ $(document).ready(async function() {
}
}

// Normalize URL
const uri = new URL(url);
uri.search = uri.searchParams.toString();
url = uri.toString();

const feed_modal = $("#feed-modal");
const feed_url = $("#feed-url");
feed_url.val(url).trigger("input");
Expand All @@ -197,7 +213,16 @@ $(document).ready(async function() {
const url = $("#feed-url").val();
console.log(url);
modal.find("form").hide();
modal.find(`#${action}-options`).show().attr("action", url).trigger("change");
if (url.startsWith(window.location.origin)) {
if (action === "youtube") {
const uri = new URL(url);
const q = uri.searchParams.get("q");
if (q) {
$("#youtube_title_filter").val(q);
}
}
modal.find(`#${action}-options`).show().attr("action", url).trigger("change");
}
});

$("#copy-button").click(function() {
Expand All @@ -216,14 +241,24 @@ $(document).ready(async function() {
return false;
});

$("#feed-modal form").change(function() {
$("#feed-modal form").on("input", function(e) {
const form = $(this);
const qs = $.param(form.serializeArray().filter(input => input.value !== ""));
let url = form.attr("action");
if (qs !== "") {
url += `?${qs}`;
const uri = new URL(form.attr("action"));
const inputs = form.serializeArray();
for (const input of inputs) {
if (input.value === "") {
if (uri.searchParams.has(input.name)) {
uri.searchParams.delete(input.name);
}
continue;
}
uri.searchParams.set(input.name, input.value);
}
const url = uri.toString();
$("#feed-url").val(url).trigger("input");
if (e.target.tagName !== "INPUT" || e.target.type !== "text") {
$("#feed-url").select();
}
$("#feed-url").val(url).trigger("input").select();
});

$("[data-download-filename]").click(async function() {
Expand Down
26 changes: 26 additions & 0 deletions views/index.erb
Expand Up @@ -221,6 +221,32 @@
</div>
</form>

<form id="youtube-options" method="get">
<div class="form-check">
<label class="form-check-label">
<input class="form-check-input" type="checkbox" name="shorts" value="0"> Filter out shorts
</label>
</div>
<div class="form-check">
<label class="form-check-label">
<input class="form-check-input" type="checkbox" name="min_length" value="1m"> Minimum video length
</label>
</div>
<div class="form-check">
<label class="form-check-label">
<input class="form-check-input" type="checkbox" name="eventType" value="live,upcoming"> Live and upcoming streams
</label>
</div>
<div class="row gx-2 mt-2">
<div class="col-auto">
<label class="col-form-label" for="youtube_title_filter">Filter by video title</label>
</div>
<div class="col-auto">
<input type="text" id="youtube_title_filter" name="q" class="form-control">
</div>
</div>
</form>

<form id="instagram-options" method="get">
<div class="form-check">
<label class="form-check-label">
Expand Down

0 comments on commit 55b5fa8

Please sign in to comment.