In [27]:
params = {
    'engine': 'google_scholar',
    'q': 'law legal',
    # 'api_key': self.serpapi_key,
    'api_key' : "3611eaea5638a59ec95b6329077ddd9c8a71ece3",
    'num': 10
}


In [7]:
import json
import requests
from dataclasses import dataclass
from typing import List, Dict, Optional

In [8]:
@dataclass
class SearchResult:
    title: str
    url: str
    snippet: str
    source: str
    relevance_score: float
    publication_date: Optional[str] = None

In [None]:
test_queries = [
        "What are the latest developments in AI liability law?",
        "Find research papers on Fourth Amendment and digital privacy",
        "Contract law cases involving force majeure during COVID-19",
        "Environmental law and carbon credit regulations",
        "Criminal procedure and Miranda rights exceptions"
    ]

In [28]:
def _calculate_relevance(self, text: str, context) -> float:
    """Calculate relevance score based on keyword matching and context"""
    text_lower = text.lower()
    score = 0.0
    
    # Keyword matching
    for keyword in context.keywords:
        if keyword.lower() in text_lower:
            score += 0.1
    
    # Legal domain matching
    domain_terms = self.legal_domains.get(context.legal_domain, [])
    for term in domain_terms:
        if term in text_lower:
            score += 0.15
    
    # Boost for academic terms
    academic_terms = ['journal', 'review', 'law review', 'university', 'court', 'case']
    for term in academic_terms:
        if term in text_lower:
            score += 0.1
    
    return min(score, 1.0)  # Cap at 1.0

In [29]:
response = requests.get('https://serpapi.com/search', params=params)

In [30]:
response

<Response [401]>

In [32]:
response = requests.get('https://google.serper.dev', params=params)
data = response.json()
results = []
    
for item in data.get('organic_results', []):
    print(1)
    results.append(SearchResult(
        title=item.get('title', ''),
        url=item.get('link', ''),
        snippet=item.get('snippet', ''),
        source='Google Scholar',
        relevance_score=_calculate_relevance(item.get('title', '') + ' ' + item.get('snippet', ''), "What are the latest developments in AI liability law?"),
        publication_date=item.get('publication_info', {}).get('summary', '')
    ))
            

In [33]:
results

[]

In [31]:
import requests
import json
from typing import Dict, List, Optional
import time
from urllib.parse import quote_plus

class AuthoritativeLegalSearch:
    def __init__(self, api_key: str):
        """
        Initialize Serper API client for authoritative legal research sources
        
        Args:
            api_key: Your Serper API key from https://serper.dev/
        """
        self.api_key = api_key
        self.base_url = "https://google.serper.dev"
        
        # Authoritative legal and academic sources
        self.legal_sources = {
            'researchgate': 'site:researchgate.net',
            'ssrn': 'site:ssrn.com OR site:papers.ssrn.com',
            'jstor': 'site:jstor.org',
            'heinonline': 'site:heinonline.org',
            'westlaw': 'site:westlaw.com OR site:next.westlaw.com',
            'lexisnexis': 'site:lexisnexis.com',
            'cambridge': 'site:cambridge.org',
            'oxford': 'site:academic.oup.com OR site:oxfordlawtrove.com',
            'taylor_francis': 'site:tandfonline.com',
            'springer': 'site:springer.com OR site:link.springer.com',
            'wiley': 'site:onlinelibrary.wiley.com',
            'sage': 'site:journals.sagepub.com',
            'harvard_law': 'site:harvardlawreview.org',
            'yale_law': 'site:yalelawjournal.org',
            'stanford_law': 'site:stanfordlawreview.org',
            'columbia_law': 'site:columbialawreview.org',
            'chicago_law': 'site:lawreview.uchicago.edu',
            'nyu_law': 'site:nyulawreview.org',
            'penn_law': 'site:pennlawreview.com',
            'georgetown_law': 'site:georgetownlawjournal.org',
            'cornell_law': 'site:lawschool.cornell.edu',
            'bepress': 'site:bepress.com',
            'elsevier': 'site:sciencedirect.com',
            'proquest': 'site:proquest.com',
            'hathitrust': 'site:hathitrust.org',
            'google_scholar': 'site:scholar.google.com'
        }
        
        # Legal research keywords for enhanced queries
        self.legal_keywords = [
            '"law review"', '"legal journal"', '"legal analysis"',
            '"case law"', '"legal research"', '"jurisprudence"',
            '"legal scholarship"', '"court decision"', '"legal doctrine"',
            '"statutory interpretation"', '"constitutional law"',
            '"legal precedent"', '"judicial opinion"'
        ]
    
    def search_all_sources(self, query: str, num_results: int = 20) -> Dict:
        """
        Search across all authoritative legal sources
        
        Args:
            query: Legal research query
            num_results: Number of results per source type
        
        Returns:
            Combined results from all sources
        """
        
        # Create comprehensive site search query
        site_queries = ' OR '.join(self.legal_sources.values())
        legal_terms = ' OR '.join(self.legal_keywords[:5])  # Use top 5 legal keywords
        
        enhanced_query = f'({query}) AND ({site_queries}) AND ({legal_terms})'
        
        headers = {
            'X-API-KEY': self.api_key,
            'Content-Type': 'application/json'
        }
        
        payload = {
            'q': enhanced_query,
            'num': num_results,
            'gl': 'us',
            'hl': 'en'
        }
        
        try:
            response = requests.post(f"{self.base_url}/search", 
                                   headers=headers, 
                                   data=json.dumps(payload))
            
            if response.status_code == 200:
                return response.json()
            else:
                return {"error": f"HTTP {response.status_code}"}
                
        except Exception as e:
            return {"error": str(e)}
    
    def search_specific_source(self, query: str, source: str, num_results: int = 10) -> Dict:
        """
        Search a specific authoritative source
        
        Args:
            query: Legal research query
            source: Source key (e.g., 'researchgate', 'ssrn', 'jstor')
            num_results: Number of results to return
        
        Returns:
            Search results from specific source
        """
        
        if source not in self.legal_sources:
            return {"error": f"Unknown source: {source}. Available: {list(self.legal_sources.keys())}"}
        
        site_filter = self.legal_sources[source]
        legal_terms = ' OR '.join(self.legal_keywords[:3])
        
        enhanced_query = f'({query}) AND ({site_filter}) AND ({legal_terms})'
        
        headers = {
            'X-API-KEY': self.api_key,
            'Content-Type': 'application/json'
        }
        
        payload = {
            'q': enhanced_query,
            'num': num_results,
            'gl': 'us',
            'hl': 'en'
        }
        
        try:
            response = requests.post(f"{self.base_url}/search", 
                                   headers=headers, 
                                   data=json.dumps(payload))
            
            if response.status_code == 200:
                return response.json()
            else:
                return {"error": f"HTTP {response.status_code}"}
                
        except Exception as e:
            return {"error": str(e)}
    
    def search_academic_databases(self, query: str, num_results: int = 15) -> Dict:
        """
        Search specifically academic databases (JSTOR, SSRN, ResearchGate, etc.)
        
        Args:
            query: Academic legal research query
            num_results: Number of results to return
        
        Returns:
            Results from academic databases only
        """
        
        academic_sources = [
            self.legal_sources['researchgate'],
            self.legal_sources['ssrn'],
            self.legal_sources['jstor'],
            self.legal_sources['springer'],
            self.legal_sources['cambridge'],
            self.legal_sources['oxford'],
            self.legal_sources['taylor_francis'],
            self.legal_sources['wiley'],
            self.legal_sources['sage'],
            self.legal_sources['elsevier'],
            self.legal_sources['bepress']
        ]
        
        site_queries = ' OR '.join(academic_sources)
        academic_terms = '"peer reviewed" OR "academic journal" OR "research paper" OR "scholarly article"'
        
        enhanced_query = f'({query}) AND ({site_queries}) AND ({academic_terms})'
        
        headers = {
            'X-API-KEY': self.api_key,
            'Content-Type': 'application/json'
        }
        
        payload = {
            'q': enhanced_query,
            'num': num_results,
            'gl': 'us',
            'hl': 'en'
        }
        
        try:
            response = requests.post(f"{self.base_url}/search", 
                                   headers=headers, 
                                   data=json.dumps(payload))
            
            if response.status_code == 200:
                return response.json()
            else:
                return {"error": f"HTTP {response.status_code}"}
                
        except Exception as e:
            return {"error": str(e)}
    
    def search_law_reviews(self, query: str, num_results: int = 15) -> Dict:
        """
        Search top law reviews and journals
        
        Args:
            query: Legal research query
            num_results: Number of results to return
        
        Returns:
            Results from prestigious law reviews
        """
        
        law_review_sources = [
            self.legal_sources['harvard_law'],
            self.legal_sources['yale_law'],
            self.legal_sources['stanford_law'],
            self.legal_sources['columbia_law'],
            self.legal_sources['chicago_law'],
            self.legal_sources['nyu_law'],
            self.legal_sources['penn_law'],
            self.legal_sources['georgetown_law'],
            self.legal_sources['cornell_law']
        ]
        
        site_queries = ' OR '.join(law_review_sources)
        law_review_terms = '"law review" OR "legal journal" OR "law quarterly" OR "law forum"'
        
        enhanced_query = f'({query}) AND ({site_queries}) AND ({law_review_terms})'
        
        headers = {
            'X-API-KEY': self.api_key,
            'Content-Type': 'application/json'
        }
        
        payload = {
            'q': enhanced_query,
            'num': num_results,
            'gl': 'us',
            'hl': 'en'
        }
        
        try:
            response = requests.post(f"{self.base_url}/search", 
                                   headers=headers, 
                                   data=json.dumps(payload))
            
            if response.status_code == 200:
                return response.json()
            else:
                return {"error": f"HTTP {response.status_code}"}
                
        except Exception as e:
            return {"error": str(e)}
    
    def search_google_scholar_legal(self, query: str, num_results: int = 10) -> Dict:
        """
        Search Google Scholar specifically for legal academic papers
        
        Args:
            query: Academic legal search query
            num_results: Number of results to return
        
        Returns:
            Google Scholar results for legal research
        """
        
        # Enhanced query for legal academic papers
        enhanced_query = f'{query} law legal "cited by" academic research'
        
        headers = {
            'X-API-KEY': self.api_key,
            'Content-Type': 'application/json'
        }
        
        payload = {
            'q': enhanced_query,
            'num': num_results
        }
        
        try:
            response = requests.post(f"{self.base_url}/scholar", 
                                   headers=headers, 
                                   data=json.dumps(payload))
            
            if response.status_code == 200:
                return response.json()
            else:
                return {"error": f"HTTP {response.status_code}"}
                
        except Exception as e:
            return {"error": str(e)}

def identify_source(url: str) -> str:
    """
    Identify the source/platform from URL
    
    Args:
        url: The URL to analyze
    
    Returns:
        Source name
    """
    
    source_mapping = {
        'researchgate.net': '🔬 ResearchGate',
        'ssrn.com': '📚 SSRN',
        'papers.ssrn.com': '📚 SSRN Papers',
        'jstor.org': '📖 JSTOR',
        'heinonline.org': '⚖️ HeinOnline',
        'westlaw.com': '⚖️ Westlaw',
        'lexisnexis.com': '📑 LexisNexis',
        'cambridge.org': '🎓 Cambridge',
        'academic.oup.com': '🎓 Oxford Academic',
        'oxfordlawtrove.com': '⚖️ Oxford Law Trove',
        'tandfonline.com': '📚 Taylor & Francis',
        'springer.com': '📚 Springer',
        'link.springer.com': '📚 Springer Link',
        'onlinelibrary.wiley.com': '📚 Wiley Online',
        'journals.sagepub.com': '📚 SAGE Journals',
        'harvardlawreview.org': '🏛️ Harvard Law Review',
        'yalelawjournal.org': '🏛️ Yale Law Journal',
        'stanfordlawreview.org': '🏛️ Stanford Law Review',
        'columbialawreview.org': '🏛️ Columbia Law Review',
        'lawreview.uchicago.edu': '🏛️ Chicago Law Review',
        'nyulawreview.org': '🏛️ NYU Law Review',
        'sciencedirect.com': '🔬 ScienceDirect',
        'bepress.com': '📚 bepress',
        'proquest.com': '📚 ProQuest'
    }
    
    for domain, source in source_mapping.items():
        if domain in url.lower():
            return source
    
    return '🌐 Academic Source'

def format_authoritative_results(results: Dict, search_type: str = "Authoritative Sources") -> str:
    """
    Format search results with source identification and academic metrics
    
    Args:
        results: Search results from Serper API
        search_type: Type of search performed
    
    Returns:
        Formatted string of results with source attribution
    """
    
    if "error" in results:
        return f"❌ Search Error: {results['error']}"
    
    output = f"\n📚 {search_type.upper()}\n"
    output += "=" * 70 + "\n"
    
    # Handle different result structures
    if search_type.lower() == "scholar":
        organic_results = results.get('organic', [])
    else:
        organic_results = results.get('organic', [])
    
    if not organic_results:
        return f"❌ No authoritative sources found for {search_type} search."
    
    for i, result in enumerate(organic_results, 1):
        title = result.get('title', 'No Title')
        url = result.get('link', 'No URL')
        source = identify_source(url)
        
        output += f"\n{i}. {title}\n"
        output += f"   📍 Source: {source}\n"
        output += f"   🔗 URL: {url}\n"
        
        # Handle different snippet fields
        snippet = result.get('snippet') or result.get('description') or 'No description available'
        output += f"   📄 Abstract: {snippet}\n"
        
        # Scholar-specific academic metrics
        if search_type.lower() == "scholar":
            if 'publication_info' in result:
                pub_info = result['publication_info']
                summary = pub_info.get('summary', '')
                if summary:
                    output += f"   📅 Publication: {summary}\n"
            
            if 'cited_by' in result:
                citations = result['cited_by']
                total_citations = citations.get('total', 0)
                citations_link = citations.get('link', '')
                output += f"   📊 Citations: {total_citations}\n"
                if citations_link:
                    output += f"   🔍 Citation Link: {citations_link}\n"
            
            if 'related_pages' in result:
                related = result['related_pages']
                if 'link' in related:
                    output += f"   🔗 Related Articles: {related['link']}\n"
        
        # Check for PDF availability
        if 'pdf' in url.lower() or 'filetype:pdf' in str(result):
            output += f"   📄 Format: PDF Available\n"
        
        # Check for DOI or academic identifiers
        if 'doi.org' in url or 'doi:' in snippet.lower():
            output += f"   🆔 DOI: Available\n"
        
        output += "   " + "-" * 60 + "\n"
    
    # Search metadata
    if 'searchInformation' in results:
        search_info = results['searchInformation']
        total_results = search_info.get('totalResults', 'Unknown')
        search_time = search_info.get('searchTime', 'Unknown')
        output += f"\n📊 Total Authoritative Results: {total_results}"
        output += f" | ⏱️ Search Time: {search_time}s\n"
    
    return output

def demo_authoritative_searches():
    """
    Demo function showing searches across authoritative legal sources
    """
    
    # Initialize with your Serper API key
    API_KEY = "your-serper-api-key-here"
    
    searcher = AuthoritativeLegalSearch(API_KEY)
    
    # Sample legal research queries
    test_queries = [
        "artificial intelligence liability tort law",
        "constitutional privacy digital surveillance",
        "environmental law climate change litigation",
        "corporate governance fiduciary duty",
        "intellectual property fair use doctrine"
    ]
    
    print("🎓 AUTHORITATIVE LEGAL RESEARCH SEARCH DEMO")
    print("=" * 60)
    
    for query in test_queries:
        print(f"\n🔍 Research Query: '{query}'")
        print("-" * 50)
        
        # 1. Search all authoritative sources
        print("\n1️⃣ ALL AUTHORITATIVE SOURCES:")
        all_results = searcher.search_all_sources(query, num_results=8)
        print(format_authoritative_results(all_results, "All Authoritative Sources"))
        
        # 2. Academic databases only
        print("\n2️⃣ ACADEMIC DATABASES (JSTOR, SSRN, ResearchGate, etc.):")
        academic_results = searcher.search_academic_databases(query, num_results=6)
        print(format_authoritative_results(academic_results, "Academic Databases"))
        
        # 3. Top law reviews
        print("\n3️⃣ PRESTIGIOUS LAW REVIEWS:")
        law_review_results = searcher.search_law_reviews(query, num_results=5)
        print(format_authoritative_results(law_review_results, "Law Reviews"))
        
        # 4. Google Scholar for citations
        print("\n4️⃣ GOOGLE SCHOLAR (WITH CITATIONS):")
        scholar_results = searcher.search_google_scholar_legal(query, num_results=5)
        print(format_authoritative_results(scholar_results, "Scholar"))
        
        print("\n" + "=" * 80)
        time.sleep(3)  # Rate limiting

def search_specific_source_demo():
    """
    Demo searching specific authoritative sources
    """
    
    API_KEY = "your-serper-api-key-here"
    searcher = AuthoritativeLegalSearch(API_KEY)
    
    query = "pink tribunal"
    
    # Available sources
    sources = ['researchgate', 'ssrn', 'jstor', 'harvard_law', 'oxford']
    
    print(f"🔍 Searching '{query}' across specific sources:")
    print("=" * 60)
    
    for source in sources:
        print(f"\n📚 Searching {source.upper().replace('_', ' ')}:")
        results = searcher.search_specific_source(query, source, num_results=5)
        print(format_authoritative_results(results, f"{source} Results"))
        time.sleep(2)

if __name__ == "__main__":
    print("🎓 Choose search type:")
    print("1. Comprehensive authoritative search demo")
    print("2. Search specific sources (ResearchGate, SSRN, etc.)")
    print("3. Academic databases only")
    print("4. Law reviews only")
    
    choice = input("\nEnter choice (1-4): ")
    
    if choice == "1":
        demo_authoritative_searches()
    elif choice == "2":
        search_specific_source_demo()
    elif choice == "3":
        API_KEY = "your-serper-api-key-here"
        searcher = AuthoritativeLegalSearch(API_KEY)
        query = input("Enter your legal research query: ")
        results = searcher.search_academic_databases(query, 10)
        print(format_authoritative_results(results, "Academic Databases"))
    elif choice == "4":
        API_KEY = "your-serper-api-key-here"
        searcher = AuthoritativeLegalSearch(API_KEY)
        query = input("Enter your legal research query: ")
        results = searcher.search_law_reviews(query, 10)
        print(format_authoritative_results(results, "Law Reviews"))
    else:
        print("Running comprehensive demo...")
        demo_authoritative_searches()

🎓 Choose search type:
1. Comprehensive authoritative search demo
2. Search specific sources (ResearchGate, SSRN, etc.)
3. Academic databases only
4. Law reviews only
🔍 Searching 'pink tribunal' across specific sources:

📚 Searching RESEARCHGATE:
❌ Search Error: HTTP 403

📚 Searching SSRN:
❌ Search Error: HTTP 403

📚 Searching JSTOR:
❌ Search Error: HTTP 403

📚 Searching HARVARD LAW:
❌ Search Error: HTTP 403

📚 Searching OXFORD:
❌ Search Error: HTTP 403
