Skip to content

Commit

Permalink
feat: support parsing TweetTombstone type
Browse files Browse the repository at this point in the history
  • Loading branch information
prinsss committed Jan 1, 2024
1 parent 401a5c7 commit 0b51273
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 33 deletions.
6 changes: 3 additions & 3 deletions src/components/table/columns-tweet.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import {
formatTwitterImage,
getMediaOriginalUrl,
getProfileImageOriginalUrl,
extractTweetWithVisibility,
extractTweetUnion,
extractQuotedTweet,
extractTweetFullText,
} from '@/utils/api';
Expand Down Expand Up @@ -227,13 +227,13 @@ export const columns = [
exportHeader: 'Quote Source',
exportValue: (row) => {
const res = row.original.quoted_status_result?.result;
return res ? extractTweetWithVisibility(res)?.rest_id : undefined;
return res ? extractTweetUnion(res)?.rest_id : undefined;
},
},
header: () => <span>Quote Source</span>,
cell: (info) => {
const res = info.row.original.quoted_status_result?.result;
const source = res ? extractTweetWithVisibility(res) : null;
const source = res ? extractTweetUnion(res) : null;
return (
<p class="whitespace-pre">
{source ? (
Expand Down
11 changes: 7 additions & 4 deletions src/modules/tweet-detail/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,10 @@ export const TweetDetailInterceptor: Interceptor = (req, res) => {
for (const entry of timelineAddEntriesInstructionEntries) {
// The main tweet.
if (isTimelineEntryTweet(entry)) {
newData.push(extractTimelineTweet(entry.content.itemContent));
const tweet = extractTimelineTweet(entry.content.itemContent);
if (tweet) {
newData.push(tweet);
}
}

// The conversation thread.
Expand All @@ -71,9 +74,9 @@ export const TweetDetailInterceptor: Interceptor = (req, res) => {
) as TimelineAddToModuleInstruction<TimelineTweet>;

if (timelineAddToModuleInstruction) {
const tweetsInConversation = timelineAddToModuleInstruction.moduleItems.map((i) =>
extractTimelineTweet(i.item.itemContent),
);
const tweetsInConversation = timelineAddToModuleInstruction.moduleItems
.map((i) => extractTimelineTweet(i.item.itemContent))
.filter((t): t is Tweet => !!t);

newData.push(...tweetsInConversation);
}
Expand Down
18 changes: 11 additions & 7 deletions src/modules/user-tweets/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,10 @@ export const UserTweetsInterceptor: Interceptor = (req, res) => {
) as TimelinePinEntryInstruction;

if (timelinePinEntryInstruction) {
newData.push(
extractTimelineTweet(timelinePinEntryInstruction.entry.content.itemContent),
);
const tweet = extractTimelineTweet(timelinePinEntryInstruction.entry.content.itemContent);
if (tweet) {
newData.push(tweet);
}
}

// Normal tweets.
Expand All @@ -65,14 +66,17 @@ export const UserTweetsInterceptor: Interceptor = (req, res) => {
for (const entry of timelineAddEntriesInstruction.entries) {
// Extract normal tweets.
if (isTimelineEntryTweet(entry)) {
newData.push(extractTimelineTweet(entry.content.itemContent));
const tweet = extractTimelineTweet(entry.content.itemContent);
if (tweet) {
newData.push(tweet);
}
}

// Extract conversations.
if (isTimelineEntryProfileConversation(entry)) {
const tweetsInConversation = entry.content.items.map((i) =>
extractTimelineTweet(i.item.itemContent),
);
const tweetsInConversation = entry.content.items
.map((i) => extractTimelineTweet(i.item.itemContent))
.filter((t): t is Tweet => !!t);

newData.push(...tweetsInConversation);
}
Expand Down
30 changes: 26 additions & 4 deletions src/types/tweet.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@ export interface TimelineTweet {
itemType: 'TimelineTweet';
__typename: 'TimelineTweet';
tweet_results: {
result: TweetWithVisibilityResults | Tweet;
result: TweetUnion;
};
tweetDisplayType: 'Tweet' | 'SelfThread';
hasModeratedReplies?: boolean;
socialContext?: unknown;
}

export type TweetUnion = Tweet | TweetWithVisibilityResults | TweetTombstone | TweetUnavailable;

export interface TweetWithVisibilityResults {
__typename: 'TweetWithVisibilityResults';
limitedActionResults: {
Expand All @@ -23,6 +25,26 @@ export interface TweetWithVisibilityResults {
tweet: Tweet;
}

// Deleted tweets or tweets from protected accounts.
// See: https://github.com/JustAnotherArchivist/snscrape/issues/392
export interface TweetTombstone {
__typename: 'TweetTombstone';
tombstone: {
__typename: 'TextTombstone';
text: {
rtl: boolean;
text: string;
entities: unknown[];
};
};
}

// Tweets that are unavailable for some reason. Maybe NSFW.
// See: https://github.com/JustAnotherArchivist/snscrape/issues/433
export interface TweetUnavailable {
__typename: 'TweetUnavailable';
}

export interface Tweet {
__typename: 'Tweet';
rest_id: string;
Expand All @@ -43,10 +65,10 @@ export interface Tweet {
};
is_translatable: boolean;
quoted_status_result?: {
result: TweetWithVisibilityResults | Tweet;
result: TweetUnion;
};
quotedRefResult?: {
result: Partial<TweetWithVisibilityResults | Tweet>;
result: Partial<TweetUnion>;
};
views: {
count: string;
Expand Down Expand Up @@ -94,7 +116,7 @@ export interface Tweet {
user_id_str: string;
id_str: string;
retweeted_status_result?: {
result: TweetWithVisibilityResults | Tweet;
result: TweetUnion;
};
};
}
Expand Down
42 changes: 27 additions & 15 deletions src/utils/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import {
TimelineTweet,
TimelineUser,
Tweet,
TweetWithVisibilityResults,
TweetUnion,
User,
} from '@/types';
import logger from './logger';
Expand All @@ -28,7 +28,7 @@ export function extractDataFromResponse<
>(
response: XMLHttpRequest,
extractInstructionsFromJson: (json: R) => TimelineInstructions,
extractDataFromTimelineEntry: (entry: TimelineEntry<P, TimelineTimelineItem<P>>) => T | undefined,
extractDataFromTimelineEntry: (entry: TimelineEntry<P, TimelineTimelineItem<P>>) => T | null,
): T[] {
const json: R = JSON.parse(response.responseText);
const instructions = extractInstructionsFromJson(json);
Expand All @@ -55,8 +55,8 @@ export function extractDataFromResponse<
* Tweets with visibility limitation have an additional layer of nesting.
* Extract the real tweet object from the wrapper.
*/
export function extractTimelineTweet(itemContent: TimelineTweet): Tweet {
return extractTweetWithVisibility(itemContent.tweet_results.result);
export function extractTimelineTweet(itemContent: TimelineTweet): Tweet | null {
return extractTweetUnion(itemContent.tweet_results.result);
}

/*
Expand Down Expand Up @@ -127,38 +127,50 @@ export function isTimelineEntryProfileConversation(
|--------------------------------------------------------------------------
*/

export function extractTweetWithVisibility(tweet: Tweet | TweetWithVisibilityResults): Tweet {
export function extractTweetUnion(tweet: TweetUnion): Tweet | null {
if (tweet?.__typename === 'Tweet') {
return tweet;
}

if (tweet?.__typename === 'TweetWithVisibilityResults') {
return tweet.tweet;
}

return tweet;
if (tweet?.__typename === 'TweetTombstone') {
logger.info(
'TweetTombstone received. The tweet may be deleted or from a protected account.',
tweet,
);
return null;
}

if (tweet?.__typename === 'TweetUnavailable') {
logger.info('TweetUnavailable received. The tweet may be NSFW.', tweet);
return null;
}

logger.error('Unknown tweet type received. Please report this issue.', tweet);
return null;
}

export function extractRetweetedTweet(tweet: Tweet): Tweet | null {
if (tweet.legacy.retweeted_status_result?.result) {
return extractTweetWithVisibility(tweet.legacy.retweeted_status_result.result);
return extractTweetUnion(tweet.legacy.retweeted_status_result.result);
}

return null;
}

export function extractQuotedTweet(tweet: Tweet): Tweet | null {
if (tweet.quoted_status_result?.result) {
return extractTweetWithVisibility(tweet.quoted_status_result.result);
return extractTweetUnion(tweet.quoted_status_result.result);
}

return null;
}

export function extractTweetUserScreenName(tweet: Tweet): string {
try {
return tweet.core.user_results.result.legacy.screen_name;
} catch (err) {
console.log(tweet);
logger.error('Failed to extract tweet user screen name', err, tweet);
return 'READ_ERROR';
}
return tweet.core.user_results.result.legacy.screen_name;
}

export function extractTweetMedia(tweet: Tweet): Media[] {
Expand Down

0 comments on commit 0b51273

Please sign in to comment.