In [None]:
import requests
import pandas as pd

# GitHub token
TOKEN = 'token'
headers = {'Authorization': f'token {TOKEN}'}

# Function to get users in Zurich
def get_users_in_zurich():
    users = []
    page = 1
    while True:
        url = f"https://api.github.com/search/users?q=location:Zurich+followers:>50&page={page}"
        response = requests.get(url, headers=headers)

        if response.status_code == 200:
            data = response.json()
            if not data['items']:
                break

            for user in data['items']:
                user_details = requests.get(user['url'], headers=headers).json()
                users.append({
                    'login': user_details.get('login', ''),
                    'name': user_details.get('name', ''),
                    'company': (user_details.get('company', '') or '').strip().upper().replace('@', ''),
                    'location': user_details.get('location', ''),
                    'email': user_details.get('email', ''),
                    'hireable': user_details.get('hireable', False),
                    'bio': user_details.get('bio', ''),
                    'public_repos': user_details.get('public_repos', 0),
                    'followers': user_details.get('followers', 0),
                    'following': user_details.get('following', 0),
                    'created_at': user_details.get('created_at', '')
                })
            page += 1
        else:
            print(f"Error fetching users: {response.status_code} - {response.json()}")
            break

    return users

# Fetch data and create DataFrame
users_data = get_users_in_zurich()
users_df = pd.DataFrame(users_data)

# Function to get repositories for each user
def get_user_repositories(username):
    repos = []
    url = f"https://api.github.com/users/{username}/repos?per_page=500"
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        for repo in response.json():
            repos.append({
                'login': username,
                'full_name': repo.get('full_name', ''),
                'created_at': repo.get('created_at', ''),
                'stargazers_count': repo.get('stargazers_count', 0),
                'watchers_count': repo.get('watchers_count', 0),
                'language': repo.get('language', ''),
                'has_projects': repo.get('has_projects', False),
                'has_wiki': repo.get('has_wiki', False),
                'license_name': repo.get('license', {}).get('key', '') if repo.get('license') else ''
            })
    return repos

# Collect repositories data for each user
repositories = []
for username in users_df['login']:
    repositories.extend(get_user_repositories(username))

# Convert repository data to DataFrame
repositories_df = pd.DataFrame(repositories)

# Save users and repositories data to CSV
users_df.to_csv('users.csv', index=False)
repositories_df.to_csv('repositories.csv', index=False)

# Create README.md file content
readme_content = """
# GitHub Users in Zurich
This project scrapes GitHub for users located in Zurich with over 50 followers and their public repositories.

# GitHub Users in Zurich
This project scrapes GitHub for users located in Zurich with over 50 followers and their public repositories.


1. This project analyzes GitHub users in Zurich with over 50 followers, using the GitHub API for data collection. We utilized Python's requests library to extract user profiles and repository details, examining factors like follower count, hiring status, and repository features. Data was stored in CSV files for analysis.

2. Key Findings: Users with more repositories attract more followers, hireable developers tend to share their email addresses, and enabling both projects and wikis enhances collaboration and documentation.

3. Recommendations for Developers: Create and share more public repositories to attract followers, enable wikis and project boards for better collaboration, and provide contact information to facilitate networking for job opportunities.

## Files
- **users.csv**: Contains information about users in Zurich.
- **repositories.csv**: Contains information about public repositories for each user.
- **TDSProject1.ipynb**: Python code file used to scrap and analyse the data.
- **README.md**: This file.

## User Fields
- **login**: User GitHub ID
- **name**: Full name
- **company**: Company they work at
- **location**: City they are in
- **email**: Email address
- **hireable**: Whether they are open to being hired
- **bio**: Short bio
- **public_repos**: Number of public repositories
- **followers**: Number of followers
- **following**: Number of people they are following
- **created_at**: When they joined GitHub

## Repository Fields
- **login**: User GitHub ID
- **full_name**: Full name of the repository
- **created_at**: When the repository was created
- **stargazers_count**: Number of stars
- **watchers_count**: Number of watchers
- **language**: Programming language
- **has_projects**: Whether the repository has projects enabled
- **has_wiki**: Whether the repository has a wiki
- **license_name**: License type
"""

# Save README.md to file
with open('README.md', 'w') as f:
    f.write(readme_content)

# Function to download files directly
from google.colab import files

files.download('users.csv')
files.download('repositories.csv')
files.download('README.md')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# Import necessary libraries
import pandas as pd

# Load users.csv file
from google.colab import files
uploaded = files.upload()  # This will prompt you to upload the file manually

# Read the CSV file into a DataFrame
users_df = pd.read_csv('users.csv')
