In [35]:
import os
import google.auth
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import pandas as pd
import numpy as np

# Scopes for accessing YouTube Data API
SCOPES = ['https://www.googleapis.com/auth/youtube.readonly']

# Function to authenticate and build the YouTube API client
def get_youtube_service():
    # Check for existing credentials (token.json) to avoid re-authentication
    creds = None
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    
    # If credentials are not available or expired, authenticate via OAuth
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(google.auth.transport.requests.Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'client_secret_702664335613-fsbgmoj55a6flun7pfd7i2p5q18snht3.apps.googleusercontent.com.json', SCOPES)
            creds = flow.run_local_server(port=0)
        
        # Save credentials for future use
        with open('token.json', 'w') as token:
            token.write(creds.to_json())

    # Build the YouTube API client
    return build('youtube', 'v3', credentials=creds)

# Function to get the list of YouTube subscriptions
def get_subscriptions(youtube):
    try:
        # Call the API to retrieve the list of subscriptions (channels you follow)
        request = youtube.subscriptions().list(
            part="snippet",
            mine=True,  # Retrieves subscriptions for the authenticated user
            maxResults=50  # You can change this as needed
        )
        
        response = request.execute()

        channel_titles = []
        channel_ids = []

        # Print out channel names and URLs
        for item in response['items']:
            channel_title = item['snippet']['title']
            channel_id = item['snippet']['resourceId']['channelId']
            channel_ids.append(channel_id)
            channel_titles.append(channel_title)
            channel_url = f"https://www.youtube.com/channel/{channel_id}"
            #print(f"Channel Name: {channel_title}, Channel ID: {channel_id}, URL: {channel_url}")

        return(channel_titles,channel_ids)
    
    except HttpError as e:
        print(f"An error occurred: {e}")

# Main function to get YouTube channels you follow
youtube_service = get_youtube_service()
channel_title_list,channel_id_list = get_subscriptions(youtube_service)
#print('channel_title_list[:5]:',channel_title_list[:5],'\nchannel_id_list[:5]:',channel_id_list[:5])
channels_df = pd.DataFrame(list(zip(channel_title_list,channel_id_list)),columns=['channel_name','channel_id'])
channels_df.head()

channel_title_list[:5]: ['Late Night with Seth Meyers', 'BibleProject', 'Robert Reich', 'Animated Physics Werner Maurer', 'Spooky Scary Socialist'] 
channel_id_list[:5]: ['UCVTyTA7-g9nopHeHbeuvpRA', 'UCVfwlh9XpX2Y_tQfjeln9QA', 'UCuDv5p8E-evaRSh542hDV5g', 'UCGbUeWAgo3oXBBsNzPYa3Wg', 'UCl_A_42M6kvjH8Gr-rwfCUw']


Unnamed: 0,channel_name,channel_id
0,Late Night with Seth Meyers,UCVTyTA7-g9nopHeHbeuvpRA
1,BibleProject,UCVfwlh9XpX2Y_tQfjeln9QA
2,Robert Reich,UCuDv5p8E-evaRSh542hDV5g
3,Animated Physics Werner Maurer,UCGbUeWAgo3oXBBsNzPYa3Wg
4,Spooky Scary Socialist,UCl_A_42M6kvjH8Gr-rwfCUw


In [37]:
# Function to get the channel category (topics) from a channel ID
def get_channel_category(youtube, channel_id):
    try:
        # Call the API to get channel details, including topicDetails
        request = youtube.channels().list(
            part="snippet,topicDetails",
            id=channel_id
        )
        response = request.execute()

        topiclist = []

        # Extract category/topic details
        if 'items' in response and len(response['items']) > 0:
            channel_info = response['items'][0]
            channel_title = channel_info['snippet']['title']

            # Check if topicDetails are available
            if 'topicDetails' in channel_info:
                topics = channel_info['topicDetails']['topicCategories']
                #print(f"Channel Name: {channel_title}")
                #print("Categories/Topics associated with this channel:")
                topiclist = []
                for topic in topics:
                    #print(topic)
                    topiclist.append(topic.split('/')[-1])
            return(topiclist)
        else:
            print("Channel not found.")
    except HttpError as e:
        print(f"An error occurred: {e}")


channel_topics_list = []
for id_ in channel_id_list:
    channel_topics_list.append(get_channel_category(youtube_service, id_))

print('channel_title_list[:5]:',channel_title_list[:5],'\nchannel_topics_list[:5]:',channel_topics_list[:5])

channel_title_list[:5]: ['Late Night with Seth Meyers', 'BibleProject', 'Robert Reich', 'Animated Physics Werner Maurer', 'Spooky Scary Socialist'] 
channel_topics_list[:5]: [['Society', 'Entertainment', 'Television_program', 'Politics', 'Film'], ['Religion', 'Society'], ['Politics', 'Society'], ['Knowledge'], ['Society', 'Politics']]


In [38]:
# get more in depth info about a channel
# def get_channel_info(youtube, channel_id):
#     try:
#         # Call the API to get channel details
#         request = youtube.channels().list(
#             part="snippet,brandingSettings,contentDetails,statistics,topicDetails",
#             id=channel_id
#         )
#         response = request.execute()

#         # Extract information from the response
#         if 'items' in response and len(response['items']) > 0:
#             channel_info = response['items'][0]

#             # Snippet details
#             channel_title = channel_info['snippet']['title']
#             description = channel_info['snippet']['description']
#             country = channel_info['snippet'].get('country', 'N/A')
#             print(f"Channel Title: {channel_title}")
#             print(f"Description: {description}")
#             print(f"Country: {country}")

#             # Branding settings
#             if 'brandingSettings' in channel_info:
#                 branding = channel_info['brandingSettings']['channel']
#                 branding_description = branding.get('description', 'N/A')
#                 keywords = branding.get('keywords', 'N/A')
#                 print(f"Branding Description: {branding_description}")
#                 print(f"Keywords: {keywords}")

#             # Content details
#             if 'contentDetails' in channel_info:
#                 uploads_playlist = channel_info['contentDetails']['relatedPlaylists']['uploads']
#                 print(f"Uploads Playlist ID: {uploads_playlist}")

#             # Statistics
#             if 'statistics' in channel_info:
#                 subscriber_count = channel_info['statistics'].get('subscriberCount', 'N/A')
#                 view_count = channel_info['statistics'].get('viewCount', 'N/A')
#                 video_count = channel_info['statistics'].get('videoCount', 'N/A')
#                 print(f"Subscribers: {subscriber_count}")
#                 print(f"Total Views: {view_count}")
#                 print(f"Total Videos: {video_count}")

#             # Topic details
#             if 'topicDetails' in channel_info:
#                 topics = channel_info['topicDetails'].get('topicCategories', [])
#                 print("Categories/Topics associated with this channel:")
#                 for topic in topics:
#                     print(topic)

#         else:
#             print("Channel not found.")
#     except HttpError as e:
#         print(f"An error occurred: {e}")


# get_channel_info(youtube_service, channel_id)

In [40]:
channel_topis_dict = dict(zip(channel_id_list, channel_topics_list))

# send dict to chatGPT using a query like follows:
# this is a dictionary of channel-id's of different channels and topics that are associated with the respective channel. Using these topics i would like to uniquely classify the channel-id's into a handful of groups/classes. Could you output a dictionary with the class names and the channel-id's, where each channel id is classified into at least but also not more than one group?

In [49]:
# response from chatGPT:
classified_channels = {
    "Society & Culture": [
        'UCVfwlh9XpX2Y_tQfjeln9QA', 
        'UCuDv5p8E-evaRSh542hDV5g', 
        'UCNvsIonJdJ5E4EXMa65VYpA', 
        'UCrr7y8rEXb7_RiVniwvzk9w', 
        'UCqpg8i6CryfkIw0uqytHsow', 
        'UC2PA-AKmVpU6NKCGtZq_rKQ', 
        'UCG1uayRlzz3ahT8ISRdyw7Q', 
        'UCaN8DZdc8EHo5y1LsQWMiig'
    ],
    "Politics & Current Affairs": [
        'UCmGSJVG3mCRXVOP4yZrU1Dw', 
        'UCvlj0IzjSnNoduQF0l3VGng', 
        'UCJ6o36XL0CpYb6U5dNBiXHQ', 
        'UCVkSF37pPXkZbElFjBwUsEA',
        'UCT8fbPYycfhjYzLbsU1dl3Q', 
        'UC3XTzVzaHQEd30rQbuvCtTQ', 
        'UCl_A_42M6kvjH8Gr-rwfCUw'
    ],
    "Entertainment": [
        'UCVTyTA7-g9nopHeHbeuvpRA', 
        'UCNNEMxGKV1LsKZRt4vaIbvw', 
        'UCjhkuC_Pi85wGjnB0I1ydxw', 
        'UCT8fbPYycfhjYzLbsU1dl3Q', 
        'UCtGoikgbxP4F3rgI9PldI9g'
    ],
    "Lifestyle & Food": [
        'UCE3yZjxDg3iI91bcNJDFnsg', 
        'UCzH5n3Ih5kgQoiDAQt2FwLw', 
        'UCopxVPFM021dpp8L6euX-qA', 
        'UCDq5v10l4wkV5-ZBIJJFbzQ', 
        'UCcl83sSKVJpPLylAvXYTM7Q', 
        'UCcLCX8VIcNWIu6BJyjWQDww', 
        'UC5fdssPqmmGhkhsJi4VcckA'
    ],
    "Religion & Spirituality": [
        'UCVfwlh9XpX2Y_tQfjeln9QA', 
        'UCG1uayRlzz3ahT8ISRdyw7Q', 
        'UCocP40a_UvRkUAPLD5ezLIQ'
    ],
    "Knowledge & Education": [
        'UCGbUeWAgo3oXBBsNzPYa3Wg', 
        'UCUR79QVQcE7mgaPJ0oiOYkA'
    ],
    "Gaming": [
        'UClt01z1wHHT7c5lKcU8pxRQ', 
        'UCRzpegmIukwzuSbMcJ3hPZg'
    ]
}


In [104]:
a = pd.Series(classified_channels)
l = a.str.len()
classified_channels_df = pd.DataFrame({'channel_id': np.concatenate(a.values), 'class':np.repeat(a.index.values, l)}).drop_duplicates('channel_id').reset_index(drop=True)
classified_channels_df.head()

Unnamed: 0,channel_id,class
0,UCVfwlh9XpX2Y_tQfjeln9QA,Society & Culture
1,UCuDv5p8E-evaRSh542hDV5g,Society & Culture
2,UCNvsIonJdJ5E4EXMa65VYpA,Society & Culture
3,UCrr7y8rEXb7_RiVniwvzk9w,Society & Culture
4,UCqpg8i6CryfkIw0uqytHsow,Society & Culture


In [110]:
classified_channel_ids_df = pd.concat([channels_df.set_index('channel_id'),classified_channels_df.set_index('channel_id')], axis=1, join='inner')
classified_channel_ids_df.head()

Unnamed: 0_level_0,channel_name,class
channel_id,Unnamed: 1_level_1,Unnamed: 2_level_1
UCVTyTA7-g9nopHeHbeuvpRA,Late Night with Seth Meyers,Entertainment
UCVfwlh9XpX2Y_tQfjeln9QA,BibleProject,Society & Culture
UCuDv5p8E-evaRSh542hDV5g,Robert Reich,Society & Culture
UCGbUeWAgo3oXBBsNzPYa3Wg,Animated Physics Werner Maurer,Knowledge & Education
UCl_A_42M6kvjH8Gr-rwfCUw,Spooky Scary Socialist,Politics & Current Affairs


In [107]:
def get_video_ids_from_channel(youtube, channel_id):
    # URL to get the uploads playlist ID for the given channel
    channel_response = youtube.channels().list(
        part="contentDetails",
        id=channel_id
    ).execute()

    # Extract the uploads playlist ID
    uploads_playlist_id = channel_response['items'][0]['contentDetails']['relatedPlaylists']['uploads']
    
    # Now get the video IDs from the uploads playlist
    videos = []
    next_page_token = None
    
    while True:
        playlist_items_response = youtube.playlistItems().list(
            part="snippet",
            playlistId=uploads_playlist_id,
            maxResults=50,
            pageToken=next_page_token
        ).execute()

        items = playlist_items_response.get('items', [])
        for item in items:
            #video_id = item['snippet']['resourceId']['videoId']
            #videos.append(video_id)

            video_id = item['snippet']['resourceId']['videoId']
            video_title = item['snippet']['title']
            video_url = f"https://www.youtube.com/watch?v={video_id}"
            # Access the thumbnails dictionary for different image sizes
            thumbnails = item['snippet']['thumbnails']
            # Get the 'high' quality thumbnail (you can also choose 'default' or 'medium')
            thumbnail_url = thumbnails['high']['url'] if 'high' in thumbnails else thumbnails['default']['url']
            video_date = item['snippet']['publishedAt']
            
            videos.append({
                'channel_id': channel_id,
                'video_id': video_id,
                'video_title': video_title,
                'video_url': video_url,
                'video_date': video_date,
                'thumbnail_url': thumbnail_url
            })

        # Check for the next page
        next_page_token = playlist_items_response.get('nextPageToken')
        if not next_page_token:
            break

    return videos



In [108]:
# Get authenticated service
#youtube = get_authenticated_service()
video_id_list = []
for channel_id in channel_id_list:
    # Get all video IDs from the specified channel
    video_ids = get_video_ids_from_channel(youtube_service, channel_id)

    # Output the video IDs
    #print("Video IDs:")
    for video_id in video_ids:
        #print(video_id)
        video_id_list.append(video_id)

Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:
Video IDs:


In [109]:
videos_df = pd.DataFrame(video_id_list)
videos_df

Unnamed: 0,channel_id,video_id,video_title,video_url,video_date,thumbnail_url
0,UCVTyTA7-g9nopHeHbeuvpRA,wnFnPmngglI,"Tesla Recalls Cybertrucks, Walz and JD Vance D...",https://www.youtube.com/watch?v=wnFnPmngglI,2024-10-04T19:18:29Z,https://i.ytimg.com/vi/wnFnPmngglI/hqdefault.jpg
1,UCVTyTA7-g9nopHeHbeuvpRA,NMV9iWc2Gis,Is Eric Adams getting his talking points from ...,https://www.youtube.com/watch?v=NMV9iWc2Gis,2024-10-04T17:30:03Z,https://i.ytimg.com/vi/NMV9iWc2Gis/hqdefault.jpg
2,UCVTyTA7-g9nopHeHbeuvpRA,fbxLY_BRsRM,Paul Reiser Wrote a Comic Book Series Centered...,https://www.youtube.com/watch?v=fbxLY_BRsRM,2024-10-04T11:00:52Z,https://i.ytimg.com/vi/fbxLY_BRsRM/hqdefault.jpg
3,UCVTyTA7-g9nopHeHbeuvpRA,y_HkrMhN7Pg,Nate Bargatze Talks Healthy Relationship with ...,https://www.youtube.com/watch?v=y_HkrMhN7Pg,2024-10-04T10:01:08Z,https://i.ytimg.com/vi/y_HkrMhN7Pg/hqdefault.jpg
4,UCVTyTA7-g9nopHeHbeuvpRA,ZKSyN-oxegc,Nate Bargatze Read Heidi Gardner's Lines Durin...,https://www.youtube.com/watch?v=ZKSyN-oxegc,2024-10-04T10:00:38Z,https://i.ytimg.com/vi/ZKSyN-oxegc/hqdefault.jpg
...,...,...,...,...,...,...
22288,UCaN8DZdc8EHo5y1LsQWMiig,0CtO8QOhVzI,Hayao Miyazaki and the Art of Ambivalence | Bi...,https://www.youtube.com/watch?v=0CtO8QOhVzI,2017-08-11T18:17:17Z,https://i.ytimg.com/vi/0CtO8QOhVzI/hqdefault.jpg
22289,UCaN8DZdc8EHo5y1LsQWMiig,tuxCaXpKFJU,Coppercab: A Deep Dive into a Youtube Troll | ...,https://www.youtube.com/watch?v=tuxCaXpKFJU,2017-08-04T21:32:33Z,https://i.ytimg.com/vi/tuxCaXpKFJU/hqdefault.jpg
22290,UCaN8DZdc8EHo5y1LsQWMiig,5f_ReI8kxNw,Pixar and the Obsolete | Big Joel,https://www.youtube.com/watch?v=5f_ReI8kxNw,2017-07-28T18:36:06Z,https://i.ytimg.com/vi/5f_ReI8kxNw/hqdefault.jpg
22291,UCaN8DZdc8EHo5y1LsQWMiig,DhBUdvSm_54,Does Moana Work? | Big Joel,https://www.youtube.com/watch?v=DhBUdvSm_54,2017-07-21T20:43:05Z,https://i.ytimg.com/vi/DhBUdvSm_54/hqdefault.jpg


In [121]:
c = classified_channel_ids_df.reset_index()
classified_videos_df = pd.merge(c,videos_df, how='right', left_on='channel_id',right_on='channel_id')
classified_videos_df.sort_values(['video_date'],ascending=True)
classified_videos_df.head(5)
#lassified_channel_ids_df

Unnamed: 0,channel_id,channel_name,class,video_id,video_title,video_url,video_date,thumbnail_url
0,UCVTyTA7-g9nopHeHbeuvpRA,Late Night with Seth Meyers,Entertainment,wnFnPmngglI,"Tesla Recalls Cybertrucks, Walz and JD Vance D...",https://www.youtube.com/watch?v=wnFnPmngglI,2024-10-04T19:18:29Z,https://i.ytimg.com/vi/wnFnPmngglI/hqdefault.jpg
1,UCVTyTA7-g9nopHeHbeuvpRA,Late Night with Seth Meyers,Entertainment,NMV9iWc2Gis,Is Eric Adams getting his talking points from ...,https://www.youtube.com/watch?v=NMV9iWc2Gis,2024-10-04T17:30:03Z,https://i.ytimg.com/vi/NMV9iWc2Gis/hqdefault.jpg
2,UCVTyTA7-g9nopHeHbeuvpRA,Late Night with Seth Meyers,Entertainment,fbxLY_BRsRM,Paul Reiser Wrote a Comic Book Series Centered...,https://www.youtube.com/watch?v=fbxLY_BRsRM,2024-10-04T11:00:52Z,https://i.ytimg.com/vi/fbxLY_BRsRM/hqdefault.jpg
3,UCVTyTA7-g9nopHeHbeuvpRA,Late Night with Seth Meyers,Entertainment,y_HkrMhN7Pg,Nate Bargatze Talks Healthy Relationship with ...,https://www.youtube.com/watch?v=y_HkrMhN7Pg,2024-10-04T10:01:08Z,https://i.ytimg.com/vi/y_HkrMhN7Pg/hqdefault.jpg
4,UCVTyTA7-g9nopHeHbeuvpRA,Late Night with Seth Meyers,Entertainment,ZKSyN-oxegc,Nate Bargatze Read Heidi Gardner's Lines Durin...,https://www.youtube.com/watch?v=ZKSyN-oxegc,2024-10-04T10:00:38Z,https://i.ytimg.com/vi/ZKSyN-oxegc/hqdefault.jpg


In [None]:
# find way to get watched flag on each video

In [36]:
classified_videos_df = classified_videos_df.sort_values('video_date',ascending = False)
classified_videos_df.reset_index(inplace=True,drop=True)
classified_videos_df['id'] = classified_videos_df.index
classified_videos_df.drop(columns=['level_0','index'],inplace=True)
classified_videos_df

Unnamed: 0,channel_id,channel_name,class,video_id,video_title,video_url,video_date,thumbnail_url,id
0,UCVTyTA7-g9nopHeHbeuvpRA,Late Night with Seth Meyers,Entertainment,4lbgnWP-zw8,Why People Leave Trump's Rallies Early; Elon M...,https://www.youtube.com/watch?v=4lbgnWP-zw8,2024-10-08 01:00:05+00:00,https://i.ytimg.com/vi/4lbgnWP-zw8/hqdefault.jpg,0
1,UCuDv5p8E-evaRSh542hDV5g,Robert Reich,Society & Culture,zbh9SW5Cgqg,How Trump Killed Every Business He Touched | R...,https://www.youtube.com/watch?v=zbh9SW5Cgqg,2024-10-07 22:23:13+00:00,https://i.ytimg.com/vi/zbh9SW5Cgqg/hqdefault.jpg,1
2,UCmGSJVG3mCRXVOP4yZrU1Dw,Johnny Harris,Politics & Current Affairs,JdyJBbRPBM0,Why Russia is Building Bases in the Arctic,https://www.youtube.com/watch?v=JdyJBbRPBM0,2024-10-07 19:04:00+00:00,https://i.ytimg.com/vi/JdyJBbRPBM0/hqdefault.jpg,2
3,UCVfwlh9XpX2Y_tQfjeln9QA,BibleProject,Society & Culture,PqEiqCuIsvw,Where Wisdom for Healthy Relationships Begins,https://www.youtube.com/watch?v=PqEiqCuIsvw,2024-10-07 17:00:16+00:00,https://i.ytimg.com/vi/PqEiqCuIsvw/hqdefault.jpg,3
4,UCuDv5p8E-evaRSh542hDV5g,Robert Reich,Society & Culture,uYpqDtI-juo,"“It's the economy, stupid."" Here are 5 ways Tr...",https://www.youtube.com/watch?v=uYpqDtI-juo,2024-10-07 14:10:21+00:00,https://i.ytimg.com/vi/uYpqDtI-juo/hqdefault.jpg,4
...,...,...,...,...,...,...,...,...,...
22286,UCGbUeWAgo3oXBBsNzPYa3Wg,Animated Physics Werner Maurer,Knowledge & Education,6DyI4h_tnvI,Physik der Bowling-Kugel,https://www.youtube.com/watch?v=6DyI4h_tnvI,2010-08-05 10:04:13+00:00,https://i.ytimg.com/vi/6DyI4h_tnvI/hqdefault.jpg,22286
22287,UCGbUeWAgo3oXBBsNzPYa3Wg,Animated Physics Werner Maurer,Knowledge & Education,mOW3CYvOVn0,Newton's Cradle 2,https://www.youtube.com/watch?v=mOW3CYvOVn0,2010-06-15 05:40:53+00:00,https://i.ytimg.com/vi/mOW3CYvOVn0/hqdefault.jpg,22287
22288,UCGbUeWAgo3oXBBsNzPYa3Wg,Animated Physics Werner Maurer,Knowledge & Education,dCTo53kE3gs,Newton's Cradle - small and big spheres,https://www.youtube.com/watch?v=dCTo53kE3gs,2010-06-11 09:55:35+00:00,https://i.ytimg.com/vi/dCTo53kE3gs/hqdefault.jpg,22288
22289,UCGbUeWAgo3oXBBsNzPYa3Wg,Animated Physics Werner Maurer,Knowledge & Education,-MAqHqwoa4U,magic spheres,https://www.youtube.com/watch?v=-MAqHqwoa4U,2009-10-02 09:59:18+00:00,https://i.ytimg.com/vi/-MAqHqwoa4U/hqdefault.jpg,22289


In [37]:
# write table to sqlite (?) database
import sqlite3
# Create your connection.
cnx = sqlite3.connect('subscribed_videos2.sqlite3')

classified_videos_df.to_sql(name='subscribed_videos', con=cnx)

22291

In [7]:
# get data from sqlite

import pandas as pd
import sqlite3

# Read sqlite query results into a pandas DataFrame
con = sqlite3.connect("subscribed_videos2.sqlite3")
classified_videos_df = pd.read_sql_query("SELECT * from subscribed_videos", con)

In [10]:
classified_videos_df['id'] = classified_videos_df['index']

In [21]:
lista = [{'id':'hpo','name':'globi'},{'id':'grosz','name':'globi2'}]
lista

[{'id': 'hpo', 'name': 'globi'}, {'id': 'grosz', 'name': 'globi2'}]

In [19]:
lista

{'id': 'hpo', 'as': 'fff'}

In [23]:
list({x['id'] for x in lista})

['hpo', 'grosz']